From 077a17b093d9a6331e14e5ef4b2234fd268df6f6 Mon Sep 17 00:00:00 2001 From: Eliot Berriot Date: Thu, 12 Apr 2018 18:41:43 +0200 Subject: [PATCH] Scan task can now handle until --- api/funkwhale_api/federation/tasks.py | 27 ++++++++++++--- api/tests/federation/test_tasks.py | 49 +++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/api/funkwhale_api/federation/tasks.py b/api/funkwhale_api/federation/tasks.py index fd03d3290..03124d16b 100644 --- a/api/funkwhale_api/federation/tasks.py +++ b/api/funkwhale_api/federation/tasks.py @@ -1,27 +1,44 @@ +from requests.exceptions import RequestException + from funkwhale_api.taskapp import celery from . import library as lb from . import models -@celery.app.task(name='federation.scan_library') +@celery.app.task( + name='federation.scan_library', + autoretry_for=[RequestException], + retry_backoff=30, + max_retries=5) @celery.require_instance(models.Library, 'library') -def scan_library(library): +def scan_library(library, until=None): if not library.federation_enabled: return data = lb.get_library_data(library.url) scan_library_page.delay( - library_id=library.id, page_url=data['first']) + library_id=library.id, page_url=data['first'], until=until) -@celery.app.task(name='federation.scan_library_page') +@celery.app.task( + name='federation.scan_library_page', + autoretry_for=[RequestException], + retry_backoff=30, + max_retries=5) @celery.require_instance(models.Library, 'library') -def scan_library_page(library, page_url): +def scan_library_page(library, page_url, until=None): if not library.federation_enabled: return data = lb.get_library_page(library, page_url) lts = [] for item_serializer in data['items']: + item_date = item_serializer.validated_data['published'] + if until and item_date < until: + return lts.append(item_serializer.save()) + + next_page = data.get('next') + if next_page and next_page != page_url: + scan_library_page.delay(library_id=library.id, page_url=next_page) diff --git a/api/tests/federation/test_tasks.py b/api/tests/federation/test_tasks.py index 85684c1a3..80f2365b6 100644 --- a/api/tests/federation/test_tasks.py +++ b/api/tests/federation/test_tasks.py @@ -37,6 +37,7 @@ def test_scan_library_fetches_page_and_calls_scan_page( scan_page.assert_called_once_with( library_id=library.id, page_url=collection.data['first'], + until=None, ) @@ -51,11 +52,55 @@ def test_scan_page_fetches_page_and_creates_tracks( 'item_serializer': serializers.AudioSerializer, } page = serializers.CollectionPageSerializer(page_conf) - #scan_page = mocker.patch( - # 'funkwhale_api.federation.tasks.scan_library_page.delay') r_mock.get(page.data['id'], json=page.data) tasks.scan_library_page(library_id=library.pk, page_url=page.data['id']) lts = list(library.tracks.all().order_by('-published_date')) assert len(lts) == 5 + + +def test_scan_page_trigger_next_page_scan_skip_if_same( + mocker, factories, r_mock): + patched_scan = mocker.patch( + 'funkwhale_api.federation.tasks.scan_library_page.delay' + ) + library = factories['federation.Library'](federation_enabled=True) + tfs = factories['music.TrackFile'].create_batch(size=1) + page_conf = { + 'actor': library.actor, + 'id': library.url, + 'page': Paginator(tfs, 3).page(1), + 'item_serializer': serializers.AudioSerializer, + } + page = serializers.CollectionPageSerializer(page_conf) + data = page.data + data['next'] = data['id'] + r_mock.get(page.data['id'], json=data) + + tasks.scan_library_page(library_id=library.pk, page_url=data['id']) + patched_scan.assert_not_called() + + +def test_scan_page_stops_once_until_is_reached( + mocker, factories, r_mock): + library = factories['federation.Library'](federation_enabled=True) + tfs = list(reversed(factories['music.TrackFile'].create_batch(size=5))) + page_conf = { + 'actor': library.actor, + 'id': library.url, + 'page': Paginator(tfs, 3).page(1), + 'item_serializer': serializers.AudioSerializer, + } + page = serializers.CollectionPageSerializer(page_conf) + r_mock.get(page.data['id'], json=page.data) + + tasks.scan_library_page( + library_id=library.pk, + page_url=page.data['id'], + until=tfs[1].creation_date) + + lts = list(library.tracks.all().order_by('-published_date')) + assert len(lts) == 2 + for i, tf in enumerate(tfs[:1]): + assert tf.creation_date == lts[i].published_date