Fix third party upload triggers and plugin example (#2405)

This commit is contained in:
petitminion 2025-05-07 23:17:43 +00:00
parent 4549dcb61e
commit fc11c50275
10 changed files with 111 additions and 16 deletions

View File

@ -1535,3 +1535,10 @@ Typesense hostname. Defaults to `localhost` on non-Docker deployments and to `ty
Docker deployments. Docker deployments.
""" """
TYPESENSE_NUM_TYPO = env("TYPESENSE_NUM_TYPO", default=5) TYPESENSE_NUM_TYPO = env("TYPESENSE_NUM_TYPO", default=5)
"""
Max tracks to be downloaded when the THIRD_PARTY_UPLOAD plugin hook is triggered.
Each api request to playlist tracks or radio tracks trigger the hook if tracks upload are missing.
If your instance is big your ip might get rate limited.
"""
THIRD_PARTY_UPLOAD_MAX_UPLOADS = env.int("THIRD_PARTY_UPLOAD_MAX_UPLOADS", default=10)

View File

@ -154,4 +154,4 @@ REST_FRAMEWORK.update(
) )
# allows makemigrations and superuser creation # allows makemigrations and superuser creation
FORCE = env("FORCE", default=1) FORCE = env("FORCE", default=True)

View File

@ -3,7 +3,9 @@ import hashlib
import logging import logging
import os import os
import tempfile import tempfile
import time
import urllib.parse import urllib.parse
from datetime import timedelta
import requests import requests
from django.core.files import File from django.core.files import File
@ -16,6 +18,41 @@ from funkwhale_api.taskapp import celery
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class TooManyQueriesError(Exception):
pass
def check_existing_download_task(track):
if models.Upload.objects.filter(
track=track,
import_status__in=["pending", "finished"],
third_party_provider="archive-dl",
).exists():
raise TooManyQueriesError(
"Upload for this track already exist or is pending. Stopping task."
)
def check_last_third_party_queries(track, count):
# 15 per minutes according to their doc = one each 4 seconds
time_threshold = timezone.now() - timedelta(seconds=5)
if models.Upload.objects.filter(
third_party_provider="archive-dl",
import_status__in=["pending", "finished"],
creation_date__gte=time_threshold,
).exists():
logger.info(
"Last archive.org query was too recent. Trying to wait 2 seconds..."
)
time.sleep(2)
count += 1
if count > 3:
raise TooManyQueriesError(
"Probably too many archivedl tasks are queue, stopping this task"
)
check_last_third_party_queries(track, count)
def create_upload(url, track, files_data): def create_upload(url, track, files_data):
mimetype = f"audio/{files_data.get('format', 'unknown')}" mimetype = f"audio/{files_data.get('format', 'unknown')}"
duration = files_data.get("mtime", 0) duration = files_data.get("mtime", 0)
@ -38,13 +75,19 @@ def create_upload(url, track, files_data):
bitrate=bitrate, bitrate=bitrate,
library=service_library, library=service_library,
from_activity=None, from_activity=None,
import_status="finished", import_status="pending",
) )
@celery.app.task(name="archivedl.archive_download") @celery.app.task(name="archivedl.archive_download")
@celery.require_instance(models.Track.objects.select_related(), "track") @celery.require_instance(models.Track.objects.select_related(), "track")
def archive_download(track, conf): def archive_download(track, conf):
try:
check_existing_download_task(track)
check_last_third_party_queries(track, 0)
except TooManyQueriesError as e:
logger.error(e)
return
artist_name = utils.get_artist_credit_string(track) artist_name = utils.get_artist_credit_string(track)
query = f"mediatype:audio AND title:{track.title} AND creator:{artist_name}" query = f"mediatype:audio AND title:{track.title} AND creator:{artist_name}"
with requests.Session() as session: with requests.Session() as session:
@ -145,4 +188,5 @@ def filter_files(files, allowed_extensions):
def get_search_url(query, page_size, page): def get_search_url(query, page_size, page):
q = urllib.parse.urlencode({"q": query}) q = urllib.parse.urlencode({"q": query})
return f"https://archive.org/advancedsearch.php?{q}&sort[]=addeddate+desc&rows={page_size}&page={page}&output=json" return f"https://archive.org/advancedsearch.php?{q}&sort[]=addeddate+desc&rows={page_size}\
&page={page}&output=json&mediatype=audio"

View File

@ -25,7 +25,6 @@ from django.dispatch import receiver
from django.urls import reverse from django.urls import reverse
from django.utils import timezone from django.utils import timezone
from config import plugins
from funkwhale_api import musicbrainz from funkwhale_api import musicbrainz
from funkwhale_api.common import fields from funkwhale_api.common import fields
from funkwhale_api.common import models as common_models from funkwhale_api.common import models as common_models
@ -524,19 +523,10 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
def with_playable_uploads(self, actor): def with_playable_uploads(self, actor):
uploads = Upload.objects.playable_by(actor) uploads = Upload.objects.playable_by(actor)
queryset = self.prefetch_related( return self.prefetch_related(
models.Prefetch("uploads", queryset=uploads, to_attr="playable_uploads") models.Prefetch("uploads", queryset=uploads, to_attr="playable_uploads")
) )
if queryset and queryset[0].uploads.count() > 0:
return queryset
else:
plugins.trigger_hook(
plugins.TRIGGER_THIRD_PARTY_UPLOAD,
track=self.first(),
)
return queryset
def order_for_album(self): def order_for_album(self):
""" """
Order by disc number then position Order by disc number then position

View File

@ -1,6 +1,7 @@
import logging import logging
from itertools import chain from itertools import chain
from django.conf import settings
from django.db import transaction from django.db import transaction
from django.db.models import Count from django.db.models import Count
from drf_spectacular.utils import extend_schema from drf_spectacular.utils import extend_schema
@ -10,6 +11,7 @@ from rest_framework.parsers import FormParser, JSONParser, MultiPartParser
from rest_framework.renderers import JSONRenderer from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response from rest_framework.response import Response
from config import plugins
from funkwhale_api.common import fields, permissions from funkwhale_api.common import fields, permissions
from funkwhale_api.federation import routes from funkwhale_api.federation import routes
from funkwhale_api.music import models as music_models from funkwhale_api.music import models as music_models
@ -130,6 +132,12 @@ class PlaylistViewSet(
plts = playlist.playlist_tracks.all().for_nested_serialization( plts = playlist.playlist_tracks.all().for_nested_serialization(
music_utils.get_actor_from_request(request) music_utils.get_actor_from_request(request)
) )
plts_without_upload = plts.filter(track__uploads__isnull=True)
for plt in plts_without_upload[: settings.THIRD_PARTY_UPLOAD_MAX_UPLOADS]:
plugins.trigger_hook(
plugins.TRIGGER_THIRD_PARTY_UPLOAD,
track=plt.track,
)
serializer = serializers.PlaylistTrackSerializer(plts, many=True) serializer = serializers.PlaylistTrackSerializer(plts, many=True)
data = {"count": len(plts), "results": serializer.data} data = {"count": len(plts), "results": serializer.data}
return Response(data, status=200) return Response(data, status=200)

View File

View File

@ -0,0 +1,38 @@
import logging
import pytest
from funkwhale_api.contrib.archivedl import tasks
def test_check_existing_download_task(factories, caplog, mocker):
logger = logging.getLogger("funkwhale_api.contrib.archivedl")
caplog.set_level(logging.INFO)
logger.addHandler(caplog.handler)
upload = factories["music.Upload"](
third_party_provider="archive-dl", import_status="pending"
)
mocker.patch("funkwhale_api.contrib.archivedl.tasks.fetch_json", return_value={})
tasks.archive_download(track_id=upload.track.id, conf={})
assert (
"Upload for this track already exist or is pending. Stopping task"
in caplog.text
)
def test_check_last_third_party_queries(factories, caplog, mocker):
logger = logging.getLogger("funkwhale_api.contrib.archivedl")
caplog.set_level(logging.INFO)
logger.addHandler(caplog.handler)
factories["music.Upload"].create_batch(
size=10, third_party_provider="archive-dl", import_status="pending"
)
track = factories["music.Track"]()
mocker.patch("funkwhale_api.contrib.archivedl.tasks.fetch_json", return_value={})
with pytest.raises(KeyError):
tasks.archive_download(track_id=track.id, conf={})
assert (
"Last archive.org query was too recent. Trying to wait 2 seconds..."
in caplog.text
)

View File

@ -0,0 +1 @@
Fix third party upload triggers and plugin example (#2405)

View File

@ -38,7 +38,7 @@ services:
command: > command: >
sh -c ' sh -c '
pip install watchdog[watchmedo] && pip install watchdog[watchmedo] &&
watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY} watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY:-0}
' '
depends_on: depends_on:
api: api:

View File

@ -10,7 +10,9 @@ Has an admin I can add plugins that support downloading tracks from third party
## Backend ## Backend
When a track queryset is called with `with_playable_uploads` if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`. When a radio or playlist queryset is called if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`.
RadioViewSet.tracks and PlaylistViewSet.tracks are concerned. These endpoints can be called a lot, `THIRD_PARTY_UPLOAD_MAX_UPLOADS` variable allows to limits the amount af requests that are sended to the tird party service.
`handle_stream` should filter the upload queryset to display manual upload before plugin upload `handle_stream` should filter the upload queryset to display manual upload before plugin upload
@ -21,9 +23,12 @@ Plugins registering `TRIGGER_THIRD_PARTY_UPLOAD` should :
- trigger celery task. If not the queryset will take a long time to complete. - trigger celery task. If not the queryset will take a long time to complete.
- create an upload with an associated file - create an upload with an associated file
- delete the upload if no file is succefully downloaded - delete the upload if no file is succefully downloaded
- check if an upload has already been triggered to avoid overloading Celery
An example can be found in `funkwhale_api.contrib.archivedl` An example can be found in `funkwhale_api.contrib.archivedl`
To enable the archive-dl plugin : `FUNKWHALE_PLUGINS=funkwhale_api.contrib.archivedl`
## Follow up ## Follow up
-The frontend should update the track object if `TRIGGER_THIRD_PARTY_UPLOAD` -The frontend should update the track object if `TRIGGER_THIRD_PARTY_UPLOAD`
@ -32,3 +37,5 @@ An example can be found in `funkwhale_api.contrib.archivedl`
- trigger a channels group send so the frontend can update track qs when/if the upload is ready - trigger a channels group send so the frontend can update track qs when/if the upload is ready
- Third party track stream (do not download the file, only pass a stream) - Third party track stream (do not download the file, only pass a stream)
- Allow `THIRD_PARTY_UPLOAD_MAX_UPLOADS` to be set at the plugin level -> allow admin to set plugin conf in ui -> create PluginAdminViewSet