Merge branch '189-cache-deletion' into 'develop'

Resolve "Federation cached tracks not cleaned by celery task"

Closes #189

See merge request funkwhale/funkwhale!183
This commit is contained in:
Eliot Berriot 2018-05-06 13:49:50 +00:00
commit 72c3c0fd85
7 changed files with 115 additions and 21 deletions

View File

@ -1,8 +1,10 @@
import datetime import datetime
import json import json
import logging import logging
import os
from django.conf import settings from django.conf import settings
from django.db.models import Q
from django.utils import timezone from django.utils import timezone
from requests.exceptions import RequestException from requests.exceptions import RequestException
@ -96,16 +98,38 @@ def clean_music_cache():
delay = preferences['federation__music_cache_duration'] delay = preferences['federation__music_cache_duration']
if delay < 1: if delay < 1:
return # cache clearing disabled return # cache clearing disabled
limit = timezone.now() - datetime.timedelta(minutes=delay)
candidates = models.LibraryTrack.objects.filter( candidates = models.LibraryTrack.objects.filter(
audio_file__isnull=False Q(audio_file__isnull=False) & (
).values_list('local_track_file__track', flat=True) Q(local_track_file__accessed_date__lt=limit) |
listenings = Listening.objects.filter( Q(local_track_file__accessed_date=None)
creation_date__gte=timezone.now() - datetime.timedelta(minutes=delay), )
track__pk__in=candidates).values_list('track', flat=True) ).exclude(audio_file='').only('audio_file', 'id')
too_old = set(candidates) - set(listenings) for lt in candidates:
to_remove = models.LibraryTrack.objects.filter(
local_track_file__track__pk__in=too_old).only('audio_file')
for lt in to_remove:
lt.audio_file.delete() lt.audio_file.delete()
# we also delete orphaned files, if any
storage = models.LibraryTrack._meta.get_field('audio_file').storage
files = get_files(storage, 'federation_cache')
existing = models.LibraryTrack.objects.filter(audio_file__in=files)
missing = set(files) - set(existing.values_list('audio_file', flat=True))
for m in missing:
storage.delete(m)
def get_files(storage, *parts):
"""
This is a recursive function that return all files available
in a given directory using django's storage.
"""
if not parts:
raise ValueError('Missing path')
dirs, files = storage.listdir(os.path.join(*parts))
for dir in dirs:
files += get_files(storage, *(list(parts) + [dir]))
return [
os.path.join(parts[-1], path)
for path in files
]

View File

@ -0,0 +1,18 @@
# Generated by Django 2.0.3 on 2018-05-06 12:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0025_auto_20180419_2023'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='accessed_date',
field=models.DateTimeField(blank=True, null=True),
),
]

View File

@ -415,6 +415,7 @@ class TrackFile(models.Model):
source = models.URLField(null=True, blank=True, max_length=500) source = models.URLField(null=True, blank=True, max_length=500)
creation_date = models.DateTimeField(default=timezone.now) creation_date = models.DateTimeField(default=timezone.now)
modification_date = models.DateTimeField(auto_now=True) modification_date = models.DateTimeField(auto_now=True)
accessed_date = models.DateTimeField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True) duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True) acoustid_track_id = models.UUIDField(null=True, blank=True)
mimetype = models.CharField(null=True, blank=True, max_length=200) mimetype = models.CharField(null=True, blank=True, max_length=200)

View File

@ -14,6 +14,7 @@ from django.db.models.functions import Length
from django.db.models import Count from django.db.models import Count
from django.http import StreamingHttpResponse from django.http import StreamingHttpResponse
from django.urls import reverse from django.urls import reverse
from django.utils import timezone
from django.utils.decorators import method_decorator from django.utils.decorators import method_decorator
from rest_framework import viewsets, views, mixins from rest_framework import viewsets, views, mixins
@ -264,6 +265,10 @@ class TrackFileViewSet(viewsets.ReadOnlyModelViewSet):
except models.TrackFile.DoesNotExist: except models.TrackFile.DoesNotExist:
return Response(status=404) return Response(status=404)
# we update the accessed_date
f.accessed_date = timezone.now()
f.save(update_fields=['accessed_date'])
mt = f.mimetype mt = f.mimetype
audio_file = f.audio_file audio_file = f.audio_file
try: try:

View File

@ -1,4 +1,7 @@
import datetime import datetime
import os
import pathlib
import pytest
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.utils import timezone from django.utils import timezone
@ -117,17 +120,16 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
lt1 = factories['federation.LibraryTrack'](with_audio_file=True) lt1 = factories['federation.LibraryTrack'](with_audio_file=True)
lt2 = factories['federation.LibraryTrack'](with_audio_file=True) lt2 = factories['federation.LibraryTrack'](with_audio_file=True)
lt3 = factories['federation.LibraryTrack'](with_audio_file=True) lt3 = factories['federation.LibraryTrack'](with_audio_file=True)
tf1 = factories['music.TrackFile'](library_track=lt1) tf1 = factories['music.TrackFile'](
tf2 = factories['music.TrackFile'](library_track=lt2) accessed_date=timezone.now(), library_track=lt1)
tf3 = factories['music.TrackFile'](library_track=lt3) tf2 = factories['music.TrackFile'](
accessed_date=timezone.now()-datetime.timedelta(minutes=61),
# we listen to the first one, and the second one (but weeks ago) library_track=lt2)
listening1 = factories['history.Listening']( tf3 = factories['music.TrackFile'](
track=tf1.track, accessed_date=None, library_track=lt3)
creation_date=timezone.now()) path1 = lt1.audio_file.path
listening2 = factories['history.Listening']( path2 = lt2.audio_file.path
track=tf2.track, path3 = lt3.audio_file.path
creation_date=timezone.now() - datetime.timedelta(minutes=61))
tasks.clean_music_cache() tasks.clean_music_cache()
@ -138,3 +140,32 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
assert bool(lt1.audio_file) is True assert bool(lt1.audio_file) is True
assert bool(lt2.audio_file) is False assert bool(lt2.audio_file) is False
assert bool(lt3.audio_file) is False assert bool(lt3.audio_file) is False
assert os.path.exists(path1) is True
assert os.path.exists(path2) is False
assert os.path.exists(path3) is False
def test_clean_federation_music_cache_orphaned(
settings, preferences, factories):
preferences['federation__music_cache_duration'] = 60
path = os.path.join(settings.MEDIA_ROOT, 'federation_cache')
keep_path = os.path.join(os.path.join(path, '1a', 'b2'), 'keep.ogg')
remove_path = os.path.join(os.path.join(path, 'c3', 'd4'), 'remove.ogg')
os.makedirs(os.path.dirname(keep_path), exist_ok=True)
os.makedirs(os.path.dirname(remove_path), exist_ok=True)
pathlib.Path(keep_path).touch()
pathlib.Path(remove_path).touch()
lt = factories['federation.LibraryTrack'](
with_audio_file=True,
audio_file__path=keep_path)
tf = factories['music.TrackFile'](
library_track=lt,
accessed_date=timezone.now())
tasks.clean_music_cache()
lt.refresh_from_db()
assert bool(lt.audio_file) is True
assert os.path.exists(lt.audio_file.path) is True
assert os.path.exists(remove_path) is False

View File

@ -2,6 +2,7 @@ import io
import pytest import pytest
from django.urls import reverse from django.urls import reverse
from django.utils import timezone
from funkwhale_api.music import views from funkwhale_api.music import views
from funkwhale_api.federation import actors from funkwhale_api.federation import actors
@ -149,6 +150,19 @@ def test_can_proxy_remote_track(
assert library_track.audio_file.read() == b'test' assert library_track.audio_file.read() == b'test'
def test_serve_updates_access_date(factories, settings, api_client):
settings.PROTECT_AUDIO_FILES = False
track_file = factories['music.TrackFile']()
now = timezone.now()
assert track_file.accessed_date is None
response = api_client.get(track_file.path)
track_file.refresh_from_db()
assert response.status_code == 200
assert track_file.accessed_date > now
def test_can_create_import_from_federation_tracks( def test_can_create_import_from_federation_tracks(
factories, superuser_api_client, mocker): factories, superuser_api_client, mocker):
lts = factories['federation.LibraryTrack'].create_batch(size=5) lts = factories['federation.LibraryTrack'].create_batch(size=5)

View File

@ -0,0 +1 @@
Federation cache suppression is now simpler and also deletes orphaned files (#189)