Merge branch '189-cache-deletion' into 'develop'

Resolve "Federation cached tracks not cleaned by celery task"

Closes #189

See merge request funkwhale/funkwhale!183
This commit is contained in:
Eliot Berriot 2018-05-06 13:49:50 +00:00
commit 72c3c0fd85
7 changed files with 115 additions and 21 deletions

View File

@ -1,8 +1,10 @@
import datetime
import json
import logging
import os
from django.conf import settings
from django.db.models import Q
from django.utils import timezone
from requests.exceptions import RequestException
@ -96,16 +98,38 @@ def clean_music_cache():
delay = preferences['federation__music_cache_duration']
if delay < 1:
return # cache clearing disabled
limit = timezone.now() - datetime.timedelta(minutes=delay)
candidates = models.LibraryTrack.objects.filter(
audio_file__isnull=False
).values_list('local_track_file__track', flat=True)
listenings = Listening.objects.filter(
creation_date__gte=timezone.now() - datetime.timedelta(minutes=delay),
track__pk__in=candidates).values_list('track', flat=True)
too_old = set(candidates) - set(listenings)
to_remove = models.LibraryTrack.objects.filter(
local_track_file__track__pk__in=too_old).only('audio_file')
for lt in to_remove:
Q(audio_file__isnull=False) & (
Q(local_track_file__accessed_date__lt=limit) |
Q(local_track_file__accessed_date=None)
)
).exclude(audio_file='').only('audio_file', 'id')
for lt in candidates:
lt.audio_file.delete()
# we also delete orphaned files, if any
storage = models.LibraryTrack._meta.get_field('audio_file').storage
files = get_files(storage, 'federation_cache')
existing = models.LibraryTrack.objects.filter(audio_file__in=files)
missing = set(files) - set(existing.values_list('audio_file', flat=True))
for m in missing:
storage.delete(m)
def get_files(storage, *parts):
"""
This is a recursive function that return all files available
in a given directory using django's storage.
"""
if not parts:
raise ValueError('Missing path')
dirs, files = storage.listdir(os.path.join(*parts))
for dir in dirs:
files += get_files(storage, *(list(parts) + [dir]))
return [
os.path.join(parts[-1], path)
for path in files
]

View File

@ -0,0 +1,18 @@
# Generated by Django 2.0.3 on 2018-05-06 12:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0025_auto_20180419_2023'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='accessed_date',
field=models.DateTimeField(blank=True, null=True),
),
]

View File

@ -415,6 +415,7 @@ class TrackFile(models.Model):
source = models.URLField(null=True, blank=True, max_length=500)
creation_date = models.DateTimeField(default=timezone.now)
modification_date = models.DateTimeField(auto_now=True)
accessed_date = models.DateTimeField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
mimetype = models.CharField(null=True, blank=True, max_length=200)

View File

@ -14,6 +14,7 @@ from django.db.models.functions import Length
from django.db.models import Count
from django.http import StreamingHttpResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.decorators import method_decorator
from rest_framework import viewsets, views, mixins
@ -264,6 +265,10 @@ class TrackFileViewSet(viewsets.ReadOnlyModelViewSet):
except models.TrackFile.DoesNotExist:
return Response(status=404)
# we update the accessed_date
f.accessed_date = timezone.now()
f.save(update_fields=['accessed_date'])
mt = f.mimetype
audio_file = f.audio_file
try:

View File

@ -1,4 +1,7 @@
import datetime
import os
import pathlib
import pytest
from django.core.paginator import Paginator
from django.utils import timezone
@ -117,17 +120,16 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
lt1 = factories['federation.LibraryTrack'](with_audio_file=True)
lt2 = factories['federation.LibraryTrack'](with_audio_file=True)
lt3 = factories['federation.LibraryTrack'](with_audio_file=True)
tf1 = factories['music.TrackFile'](library_track=lt1)
tf2 = factories['music.TrackFile'](library_track=lt2)
tf3 = factories['music.TrackFile'](library_track=lt3)
# we listen to the first one, and the second one (but weeks ago)
listening1 = factories['history.Listening'](
track=tf1.track,
creation_date=timezone.now())
listening2 = factories['history.Listening'](
track=tf2.track,
creation_date=timezone.now() - datetime.timedelta(minutes=61))
tf1 = factories['music.TrackFile'](
accessed_date=timezone.now(), library_track=lt1)
tf2 = factories['music.TrackFile'](
accessed_date=timezone.now()-datetime.timedelta(minutes=61),
library_track=lt2)
tf3 = factories['music.TrackFile'](
accessed_date=None, library_track=lt3)
path1 = lt1.audio_file.path
path2 = lt2.audio_file.path
path3 = lt3.audio_file.path
tasks.clean_music_cache()
@ -138,3 +140,32 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
assert bool(lt1.audio_file) is True
assert bool(lt2.audio_file) is False
assert bool(lt3.audio_file) is False
assert os.path.exists(path1) is True
assert os.path.exists(path2) is False
assert os.path.exists(path3) is False
def test_clean_federation_music_cache_orphaned(
settings, preferences, factories):
preferences['federation__music_cache_duration'] = 60
path = os.path.join(settings.MEDIA_ROOT, 'federation_cache')
keep_path = os.path.join(os.path.join(path, '1a', 'b2'), 'keep.ogg')
remove_path = os.path.join(os.path.join(path, 'c3', 'd4'), 'remove.ogg')
os.makedirs(os.path.dirname(keep_path), exist_ok=True)
os.makedirs(os.path.dirname(remove_path), exist_ok=True)
pathlib.Path(keep_path).touch()
pathlib.Path(remove_path).touch()
lt = factories['federation.LibraryTrack'](
with_audio_file=True,
audio_file__path=keep_path)
tf = factories['music.TrackFile'](
library_track=lt,
accessed_date=timezone.now())
tasks.clean_music_cache()
lt.refresh_from_db()
assert bool(lt.audio_file) is True
assert os.path.exists(lt.audio_file.path) is True
assert os.path.exists(remove_path) is False

View File

@ -2,6 +2,7 @@ import io
import pytest
from django.urls import reverse
from django.utils import timezone
from funkwhale_api.music import views
from funkwhale_api.federation import actors
@ -149,6 +150,19 @@ def test_can_proxy_remote_track(
assert library_track.audio_file.read() == b'test'
def test_serve_updates_access_date(factories, settings, api_client):
settings.PROTECT_AUDIO_FILES = False
track_file = factories['music.TrackFile']()
now = timezone.now()
assert track_file.accessed_date is None
response = api_client.get(track_file.path)
track_file.refresh_from_db()
assert response.status_code == 200
assert track_file.accessed_date > now
def test_can_create_import_from_federation_tracks(
factories, superuser_api_client, mocker):
lts = factories['federation.LibraryTrack'].create_batch(size=5)

View File

@ -0,0 +1 @@
Federation cache suppression is now simpler and also deletes orphaned files (#189)