Fix third party upload triggers and plugin example (#2405)

This commit is contained in:
petitminion 2025-05-07 23:17:43 +00:00
parent 4549dcb61e
commit fc11c50275
10 changed files with 111 additions and 16 deletions

View File

@ -1535,3 +1535,10 @@ Typesense hostname. Defaults to `localhost` on non-Docker deployments and to `ty
Docker deployments.
"""
TYPESENSE_NUM_TYPO = env("TYPESENSE_NUM_TYPO", default=5)
"""
Max tracks to be downloaded when the THIRD_PARTY_UPLOAD plugin hook is triggered.
Each api request to playlist tracks or radio tracks trigger the hook if tracks upload are missing.
If your instance is big your ip might get rate limited.
"""
THIRD_PARTY_UPLOAD_MAX_UPLOADS = env.int("THIRD_PARTY_UPLOAD_MAX_UPLOADS", default=10)

View File

@ -154,4 +154,4 @@ REST_FRAMEWORK.update(
)
# allows makemigrations and superuser creation
FORCE = env("FORCE", default=1)
FORCE = env("FORCE", default=True)

View File

@ -3,7 +3,9 @@ import hashlib
import logging
import os
import tempfile
import time
import urllib.parse
from datetime import timedelta
import requests
from django.core.files import File
@ -16,6 +18,41 @@ from funkwhale_api.taskapp import celery
logger = logging.getLogger(__name__)
class TooManyQueriesError(Exception):
pass
def check_existing_download_task(track):
if models.Upload.objects.filter(
track=track,
import_status__in=["pending", "finished"],
third_party_provider="archive-dl",
).exists():
raise TooManyQueriesError(
"Upload for this track already exist or is pending. Stopping task."
)
def check_last_third_party_queries(track, count):
# 15 per minutes according to their doc = one each 4 seconds
time_threshold = timezone.now() - timedelta(seconds=5)
if models.Upload.objects.filter(
third_party_provider="archive-dl",
import_status__in=["pending", "finished"],
creation_date__gte=time_threshold,
).exists():
logger.info(
"Last archive.org query was too recent. Trying to wait 2 seconds..."
)
time.sleep(2)
count += 1
if count > 3:
raise TooManyQueriesError(
"Probably too many archivedl tasks are queue, stopping this task"
)
check_last_third_party_queries(track, count)
def create_upload(url, track, files_data):
mimetype = f"audio/{files_data.get('format', 'unknown')}"
duration = files_data.get("mtime", 0)
@ -38,13 +75,19 @@ def create_upload(url, track, files_data):
bitrate=bitrate,
library=service_library,
from_activity=None,
import_status="finished",
import_status="pending",
)
@celery.app.task(name="archivedl.archive_download")
@celery.require_instance(models.Track.objects.select_related(), "track")
def archive_download(track, conf):
try:
check_existing_download_task(track)
check_last_third_party_queries(track, 0)
except TooManyQueriesError as e:
logger.error(e)
return
artist_name = utils.get_artist_credit_string(track)
query = f"mediatype:audio AND title:{track.title} AND creator:{artist_name}"
with requests.Session() as session:
@ -145,4 +188,5 @@ def filter_files(files, allowed_extensions):
def get_search_url(query, page_size, page):
q = urllib.parse.urlencode({"q": query})
return f"https://archive.org/advancedsearch.php?{q}&sort[]=addeddate+desc&rows={page_size}&page={page}&output=json"
return f"https://archive.org/advancedsearch.php?{q}&sort[]=addeddate+desc&rows={page_size}\
&page={page}&output=json&mediatype=audio"

View File

@ -25,7 +25,6 @@ from django.dispatch import receiver
from django.urls import reverse
from django.utils import timezone
from config import plugins
from funkwhale_api import musicbrainz
from funkwhale_api.common import fields
from funkwhale_api.common import models as common_models
@ -524,19 +523,10 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
def with_playable_uploads(self, actor):
uploads = Upload.objects.playable_by(actor)
queryset = self.prefetch_related(
return self.prefetch_related(
models.Prefetch("uploads", queryset=uploads, to_attr="playable_uploads")
)
if queryset and queryset[0].uploads.count() > 0:
return queryset
else:
plugins.trigger_hook(
plugins.TRIGGER_THIRD_PARTY_UPLOAD,
track=self.first(),
)
return queryset
def order_for_album(self):
"""
Order by disc number then position

View File

@ -1,6 +1,7 @@
import logging
from itertools import chain
from django.conf import settings
from django.db import transaction
from django.db.models import Count
from drf_spectacular.utils import extend_schema
@ -10,6 +11,7 @@ from rest_framework.parsers import FormParser, JSONParser, MultiPartParser
from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response
from config import plugins
from funkwhale_api.common import fields, permissions
from funkwhale_api.federation import routes
from funkwhale_api.music import models as music_models
@ -130,6 +132,12 @@ class PlaylistViewSet(
plts = playlist.playlist_tracks.all().for_nested_serialization(
music_utils.get_actor_from_request(request)
)
plts_without_upload = plts.filter(track__uploads__isnull=True)
for plt in plts_without_upload[: settings.THIRD_PARTY_UPLOAD_MAX_UPLOADS]:
plugins.trigger_hook(
plugins.TRIGGER_THIRD_PARTY_UPLOAD,
track=plt.track,
)
serializer = serializers.PlaylistTrackSerializer(plts, many=True)
data = {"count": len(plts), "results": serializer.data}
return Response(data, status=200)

View File

View File

@ -0,0 +1,38 @@
import logging
import pytest
from funkwhale_api.contrib.archivedl import tasks
def test_check_existing_download_task(factories, caplog, mocker):
logger = logging.getLogger("funkwhale_api.contrib.archivedl")
caplog.set_level(logging.INFO)
logger.addHandler(caplog.handler)
upload = factories["music.Upload"](
third_party_provider="archive-dl", import_status="pending"
)
mocker.patch("funkwhale_api.contrib.archivedl.tasks.fetch_json", return_value={})
tasks.archive_download(track_id=upload.track.id, conf={})
assert (
"Upload for this track already exist or is pending. Stopping task"
in caplog.text
)
def test_check_last_third_party_queries(factories, caplog, mocker):
logger = logging.getLogger("funkwhale_api.contrib.archivedl")
caplog.set_level(logging.INFO)
logger.addHandler(caplog.handler)
factories["music.Upload"].create_batch(
size=10, third_party_provider="archive-dl", import_status="pending"
)
track = factories["music.Track"]()
mocker.patch("funkwhale_api.contrib.archivedl.tasks.fetch_json", return_value={})
with pytest.raises(KeyError):
tasks.archive_download(track_id=track.id, conf={})
assert (
"Last archive.org query was too recent. Trying to wait 2 seconds..."
in caplog.text
)

View File

@ -0,0 +1 @@
Fix third party upload triggers and plugin example (#2405)

View File

@ -38,7 +38,7 @@ services:
command: >
sh -c '
pip install watchdog[watchmedo] &&
watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY}
watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY:-0}
'
depends_on:
api:

View File

@ -10,7 +10,9 @@ Has an admin I can add plugins that support downloading tracks from third party
## Backend
When a track queryset is called with `with_playable_uploads` if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`.
When a radio or playlist queryset is called if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`.
RadioViewSet.tracks and PlaylistViewSet.tracks are concerned. These endpoints can be called a lot, `THIRD_PARTY_UPLOAD_MAX_UPLOADS` variable allows to limits the amount af requests that are sended to the tird party service.
`handle_stream` should filter the upload queryset to display manual upload before plugin upload
@ -21,9 +23,12 @@ Plugins registering `TRIGGER_THIRD_PARTY_UPLOAD` should :
- trigger celery task. If not the queryset will take a long time to complete.
- create an upload with an associated file
- delete the upload if no file is succefully downloaded
- check if an upload has already been triggered to avoid overloading Celery
An example can be found in `funkwhale_api.contrib.archivedl`
To enable the archive-dl plugin : `FUNKWHALE_PLUGINS=funkwhale_api.contrib.archivedl`
## Follow up
-The frontend should update the track object if `TRIGGER_THIRD_PARTY_UPLOAD`
@ -32,3 +37,5 @@ An example can be found in `funkwhale_api.contrib.archivedl`
- trigger a channels group send so the frontend can update track qs when/if the upload is ready
- Third party track stream (do not download the file, only pass a stream)
- Allow `THIRD_PARTY_UPLOAD_MAX_UPLOADS` to be set at the plugin level -> allow admin to set plugin conf in ui -> create PluginAdminViewSet