diff --git a/api/config/settings/common.py b/api/config/settings/common.py index 776d73c2e..e93d01018 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -1534,3 +1534,10 @@ Typesense hostname. Defaults to `localhost` on non-Docker deployments and to `ty Docker deployments. """ TYPESENSE_NUM_TYPO = env("TYPESENSE_NUM_TYPO", default=5) + +""" +Max tracks to be downloaded when the THIRD_PARTY_UPLOAD plugin hook is triggered. +Each api request to playlist tracks or radio tracks trigger the hook if tracks upload are missing. +If your instance is big your ip might get rate limited. +""" +THIRD_PARTY_UPLOAD_MAX_UPLOADS = env.int("THIRD_PARTY_UPLOAD_MAX_UPLOADS", default=10) diff --git a/api/config/settings/local.py b/api/config/settings/local.py index b1674f46d..bdcc42079 100644 --- a/api/config/settings/local.py +++ b/api/config/settings/local.py @@ -154,4 +154,4 @@ REST_FRAMEWORK.update( ) # allows makemigrations and superuser creation -FORCE = env("FORCE", default=1) +FORCE = env("FORCE", default=True) diff --git a/api/funkwhale_api/contrib/archivedl/tasks.py b/api/funkwhale_api/contrib/archivedl/tasks.py index 3a1b1cb80..5676d5bb5 100644 --- a/api/funkwhale_api/contrib/archivedl/tasks.py +++ b/api/funkwhale_api/contrib/archivedl/tasks.py @@ -3,7 +3,9 @@ import hashlib import logging import os import tempfile +import time import urllib.parse +from datetime import timedelta import requests from django.core.files import File @@ -16,6 +18,37 @@ from funkwhale_api.taskapp import celery logger = logging.getLogger(__name__) +def check_existing_download_task(track): + if models.Upload.objects.filter( + track=track, + import_status__in=["pending", "finished"], + ).exists(): + logger.info("Upload for this track already exist or is pending. Stopping task.") + return + + +def check_last_third_party_queries(track, count): + # 15 per minutes according to their doc = one each 4 seconds + time_threshold = timezone.now() - timedelta(seconds=5) + if models.Upload.objects.filter( + track=track, + third_party_provider="archive-dl", + import_status__in=["pending", "finished"], + creation_date__gte=time_threshold, + ).exists(): + logger.info( + "Last archive.org query was too recent. Trying to wait 10 seconds..." + ) + time.sleep(10) + count += 1 + if count > 3: + logger.info( + "Probably too many archivedl tasks are queue, stopping this task" + ) + return + check_last_third_party_queries(track, count) + + def create_upload(url, track, files_data): mimetype = f"audio/{files_data.get('format', 'unknown')}" duration = files_data.get("mtime", 0) @@ -38,13 +71,15 @@ def create_upload(url, track, files_data): bitrate=bitrate, library=service_library, from_activity=None, - import_status="finished", + import_status="pending", ) @celery.app.task(name="archivedl.archive_download") @celery.require_instance(models.Track.objects.select_related(), "track") def archive_download(track, conf): + check_existing_download_task(track) + check_last_third_party_queries(track, 0) artist_name = utils.get_artist_credit_string(track) query = f"mediatype:audio AND title:{track.title} AND creator:{artist_name}" with requests.Session() as session: diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index fe1e2b0c9..abff1a7cf 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -24,7 +24,6 @@ from django.dispatch import receiver from django.urls import reverse from django.utils import timezone -from config import plugins from funkwhale_api import musicbrainz from funkwhale_api.common import fields from funkwhale_api.common import models as common_models @@ -523,19 +522,10 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet): def with_playable_uploads(self, actor): uploads = Upload.objects.playable_by(actor) - queryset = self.prefetch_related( + return self.prefetch_related( models.Prefetch("uploads", queryset=uploads, to_attr="playable_uploads") ) - if queryset and queryset[0].uploads.count() > 0: - return queryset - else: - plugins.trigger_hook( - plugins.TRIGGER_THIRD_PARTY_UPLOAD, - track=self.first(), - ) - return queryset - def order_for_album(self): """ Order by disc number then position diff --git a/api/funkwhale_api/playlists/views.py b/api/funkwhale_api/playlists/views.py index 98872f601..9cb1c2034 100644 --- a/api/funkwhale_api/playlists/views.py +++ b/api/funkwhale_api/playlists/views.py @@ -1,5 +1,6 @@ import logging +from django.conf import settings from django.db import transaction from django.db.models import Count from drf_spectacular.utils import extend_schema @@ -9,6 +10,7 @@ from rest_framework.parsers import FormParser, JSONParser, MultiPartParser from rest_framework.renderers import JSONRenderer from rest_framework.response import Response +from config import plugins from funkwhale_api.common import fields, permissions from funkwhale_api.federation import routes from funkwhale_api.music import models as music_models @@ -128,6 +130,12 @@ class PlaylistViewSet( plts = playlist.playlist_tracks.all().for_nested_serialization( music_utils.get_actor_from_request(request) ) + plts_without_upload = plts.filter(track__uploads__isnull=False) + for plt in plts_without_upload[: settings.THIRD_PARTY_UPLOAD_MAX_UPLOADS]: + plugins.trigger_hook( + plugins.TRIGGER_THIRD_PARTY_UPLOAD, + track=plt.track, + ) serializer = serializers.PlaylistTrackSerializer(plts, many=True) data = {"count": len(plts), "results": serializer.data} return Response(data, status=200) diff --git a/api/funkwhale_api/radios/views.py b/api/funkwhale_api/radios/views.py index 5799121a0..cb1be5cfe 100644 --- a/api/funkwhale_api/radios/views.py +++ b/api/funkwhale_api/radios/views.py @@ -1,5 +1,6 @@ import pickle +from django.conf import settings from django.contrib.auth.models import AnonymousUser from django.core.cache import cache from django.db.models import Q @@ -8,6 +9,7 @@ from rest_framework import mixins, status, viewsets from rest_framework.decorators import action from rest_framework.response import Response +from config import plugins from funkwhale_api.common import permissions as common_permissions from funkwhale_api.music import utils as music_utils from funkwhale_api.music.serializers import TrackSerializer @@ -51,9 +53,15 @@ class RadioViewSet( @action(methods=["get"], detail=True, serializer_class=TrackSerializer) def tracks(self, request, *args, **kwargs): radio = self.get_object() - tracks = radio.get_candidates().for_nested_serialization() + tracks = radio.get_candidates() + tracks_without_upload = tracks.filter(uploads__isnull=True) actor = music_utils.get_actor_from_request(self.request) tracks = tracks.with_playable_uploads(actor) + for track in tracks_without_upload[: settings.THIRD_PARTY_UPLOAD_MAX_UPLOADS]: + plugins.trigger_hook( + plugins.TRIGGER_THIRD_PARTY_UPLOAD, + track=track, + ) tracks = tracks.playable_by(actor) page = self.paginate_queryset(tracks) if page is not None: diff --git a/changes/changelog.d/2405.bugfix b/changes/changelog.d/2405.bugfix new file mode 100644 index 000000000..3dfb85792 --- /dev/null +++ b/changes/changelog.d/2405.bugfix @@ -0,0 +1 @@ +Fix third party upload triggers and plugin example (#2405) diff --git a/compose/app.django.yml b/compose/app.django.yml index 2b2b80f0e..e9d49bcd0 100644 --- a/compose/app.django.yml +++ b/compose/app.django.yml @@ -42,7 +42,7 @@ services: command: > sh -c ' pip install watchdog[watchmedo] && - watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY} + watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY:-0} ' depends_on: api: diff --git a/docs/specs/third-party-tracks/index.md b/docs/specs/third-party-tracks/index.md index 36fb7260e..037c3ea16 100644 --- a/docs/specs/third-party-tracks/index.md +++ b/docs/specs/third-party-tracks/index.md @@ -10,7 +10,9 @@ Has an admin I can add plugins that support downloading tracks from third party ## Backend -When a track queryset is called with `with_playable_uploads` if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`. +When a radio or playlist queryset is called if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`. + +RadioViewSet.tracks and PlaylistViewSet.tracks are concerned. These endpoints can be called a lot, `THIRD_PARTY_UPLOAD_MAX_UPLOADS` variable allows to limits the amount af requests that are sended to the tird party service. `handle_stream` should filter the upload queryset to display manual upload before plugin upload @@ -21,9 +23,12 @@ Plugins registering `TRIGGER_THIRD_PARTY_UPLOAD` should : - trigger celery task. If not the queryset will take a long time to complete. - create an upload with an associated file - delete the upload if no file is succefully downloaded +- check if an upload has already been triggered to avoid overloading Celery An example can be found in `funkwhale_api.contrib.archivedl` +To enable the archive-dl plugin : `FUNKWHALE_PLUGINS=funkwhale_api.contrib.archivedl` + ## Follow up -The frontend should update the track object if `TRIGGER_THIRD_PARTY_UPLOAD` @@ -32,3 +37,5 @@ An example can be found in `funkwhale_api.contrib.archivedl` - trigger a channels group send so the frontend can update track qs when/if the upload is ready - Third party track stream (do not download the file, only pass a stream) + +- Allow `THIRD_PARTY_UPLOAD_MAX_UPLOADS` to be set at the plugin level -> allow admin to set plugin conf in ui -> create PluginAdminViewSet