diff --git a/api/config/plugins.py b/api/config/plugins.py index cfdeabb3e..3603d652d 100644 --- a/api/config/plugins.py +++ b/api/config/plugins.py @@ -299,6 +299,10 @@ def background_task(name): # HOOKS +TRIGGER_THIRD_PARTY_UPLOAD = "third_party_upload" +""" +Called when a track is being listened +""" LISTENING_CREATED = "listening_created" """ Called when a track is being listened diff --git a/api/config/settings/common.py b/api/config/settings/common.py index af1d8be0c..7ca178286 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -114,6 +114,7 @@ else: logger.info("Loaded env file at %s/.env", path) break +FUNKWHALE_PLUGINS = env("FUNKWHALE_PLUGINS", default="") FUNKWHALE_PLUGINS_PATH = env( "FUNKWHALE_PLUGINS_PATH", default="/srv/funkwhale/plugins/" ) diff --git a/api/funkwhale_api/contrib/archivedl/funkwhale_ready.py b/api/funkwhale_api/contrib/archivedl/funkwhale_ready.py new file mode 100644 index 000000000..38425f3ed --- /dev/null +++ b/api/funkwhale_api/contrib/archivedl/funkwhale_ready.py @@ -0,0 +1,13 @@ +import logging + +from config import plugins +from funkwhale_api.contrib.archivedl import tasks + +from .funkwhale_startup import PLUGIN + +logger = logging.getLogger(__name__) + + +@plugins.register_hook(plugins.TRIGGER_THIRD_PARTY_UPLOAD, PLUGIN) +def lauch_download(track, conf={}): + tasks.archive_download.delay(track_id=track.pk, conf=conf) diff --git a/api/funkwhale_api/contrib/archivedl/funkwhale_startup.py b/api/funkwhale_api/contrib/archivedl/funkwhale_startup.py new file mode 100644 index 000000000..ea1fa4db6 --- /dev/null +++ b/api/funkwhale_api/contrib/archivedl/funkwhale_startup.py @@ -0,0 +1,10 @@ +from config import plugins + +PLUGIN = plugins.get_plugin_config( + name="archivedl", + label="Archive-dl", + description="", + version="0.1", + user=False, + conf=[], +) diff --git a/api/funkwhale_api/contrib/archivedl/tasks.py b/api/funkwhale_api/contrib/archivedl/tasks.py new file mode 100644 index 000000000..3a1b1cb80 --- /dev/null +++ b/api/funkwhale_api/contrib/archivedl/tasks.py @@ -0,0 +1,148 @@ +import asyncio +import hashlib +import logging +import os +import tempfile +import urllib.parse + +import requests +from django.core.files import File +from django.utils import timezone + +from funkwhale_api.federation import actors +from funkwhale_api.music import models, utils +from funkwhale_api.taskapp import celery + +logger = logging.getLogger(__name__) + + +def create_upload(url, track, files_data): + mimetype = f"audio/{files_data.get('format', 'unknown')}" + duration = files_data.get("mtime", 0) + filesize = files_data.get("size", 0) + bitrate = files_data.get("bitrate", 0) + + service_library = models.Library.objects.create( + privacy_level="everyone", + actor=actors.get_service_actor(), + ) + + return models.Upload.objects.create( + mimetype=mimetype, + source=url, + third_party_provider="archive-dl", + creation_date=timezone.now(), + track=track, + duration=duration, + size=filesize, + bitrate=bitrate, + library=service_library, + from_activity=None, + import_status="finished", + ) + + +@celery.app.task(name="archivedl.archive_download") +@celery.require_instance(models.Track.objects.select_related(), "track") +def archive_download(track, conf): + artist_name = utils.get_artist_credit_string(track) + query = f"mediatype:audio AND title:{track.title} AND creator:{artist_name}" + with requests.Session() as session: + url = get_search_url(query, page_size=1, page=1) + page_data = fetch_json(url, session) + for obj in page_data["response"]["docs"]: + logger.info(f"launching download item for {str(obj)}") + download_item( + item_data=obj, + session=session, + allowed_extensions=utils.SUPPORTED_EXTENSIONS, + track=track, + ) + + +def fetch_json(url, session): + logger.info(f"Fetching {url}...") + with session.get(url) as response: + return response.json() + + +def download_item( + item_data, + session, + allowed_extensions, + track, +): + files_data = get_files_data(item_data["identifier"], session) + to_download = list( + filter_files( + files_data["result"], + allowed_extensions=allowed_extensions, + ) + ) + url = f"https://archive.org/download/{item_data['identifier']}/{to_download[0]['name']}" + upload = create_upload(url, track, to_download[0]) + try: + with tempfile.TemporaryDirectory() as temp_dir: + path = os.path.join(temp_dir, to_download[0]["name"]) + download_file( + path, + url=url, + session=session, + checksum=to_download[0]["sha1"], + upload=upload, + to_download=to_download, + ) + + logger.info(f"Finished to download item {item_data['identifier']}...") + except Exception as e: + upload.delete() + raise e + + +def check_integrity(path, expected_checksum): + with open(path, mode="rb") as f: + hash = hashlib.sha1() + hash.update(f.read()) + + return expected_checksum == hash.hexdigest() + + +def get_files_data(identifier, session): + url = f"https://archive.org/metadata/{identifier}/files" + logger.info(f"Fetching files data at {url}...") + with session.get(url) as response: + return response.json() + + +def download_file(path, url, session, checksum, upload, to_download): + if os.path.exists(path) and check_integrity(path, checksum): + logger.info(f"Skipping already downloaded file at {path}") + return + logger.info(f"Downloading file {url}...") + with open(path, mode="wb") as f: + try: + with session.get(url) as response: + f.write(response.content) + except asyncio.TimeoutError as e: + logger.error(f"Timeout error while downloading {url}: {e}") + + with open(path, "rb") as f: + upload.audio_file.save(f"{to_download['name']}", File(f)) + upload.import_status = "finished" + upload.url = url + upload.save() + return upload + + +def filter_files(files, allowed_extensions): + for f in files: + if allowed_extensions: + extension = os.path.splitext(f["name"])[-1][1:] + if extension not in allowed_extensions: + continue + yield f + + +def get_search_url(query, page_size, page): + q = urllib.parse.urlencode({"q": query}) + return f"https://archive.org/advancedsearch.php?{q}&sort[]=addeddate+desc&rows={page_size}&page={page}&output=json" diff --git a/api/funkwhale_api/music/migrations/0062_upload_third_party_provider.py b/api/funkwhale_api/music/migrations/0062_upload_third_party_provider.py new file mode 100644 index 000000000..40499048d --- /dev/null +++ b/api/funkwhale_api/music/migrations/0062_upload_third_party_provider.py @@ -0,0 +1,42 @@ +# Generated by Django 4.2.9 on 2024-12-21 20:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("music", "0061_migrate_libraries_to_playlist"), + ] + + operations = [ + migrations.AddField( + model_name="upload", + name="third_party_provider", + field=models.CharField(blank=True, max_length=100, null=True), + ), + migrations.AlterField( + model_name="uploadversion", + name="mimetype", + field=models.CharField( + choices=[ + ("audio/mp3", "mp3"), + ("audio/mpeg3", "mp3"), + ("audio/x-mp3", "mp3"), + ("audio/mpeg", "mp3"), + ("video/ogg", "ogg"), + ("audio/ogg", "ogg"), + ("audio/opus", "opus"), + ("audio/x-m4a", "aac"), + ("audio/x-m4a", "m4a"), + ("audio/m4a", "m4a"), + ("audio/x-flac", "flac"), + ("audio/flac", "flac"), + ("audio/aiff", "aif"), + ("audio/x-aiff", "aif"), + ("audio/aiff", "aiff"), + ("audio/x-aiff", "aiff"), + ], + max_length=50, + ), + ), + ] diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index 3587b92a4..0fab722ab 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -24,6 +24,7 @@ from django.dispatch import receiver from django.urls import reverse from django.utils import timezone +from config import plugins from funkwhale_api import musicbrainz from funkwhale_api.common import fields from funkwhale_api.common import models as common_models @@ -522,10 +523,19 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet): def with_playable_uploads(self, actor): uploads = Upload.objects.playable_by(actor) - return self.prefetch_related( + queryset = self.prefetch_related( models.Prefetch("uploads", queryset=uploads, to_attr="playable_uploads") ) + if queryset and queryset[0].uploads.count() > 0: + return queryset + else: + plugins.trigger_hook( + plugins.TRIGGER_THIRD_PARTY_UPLOAD, + track=self.first(), + ) + return queryset + def order_for_album(self): """ Order by disc number then position @@ -771,6 +781,8 @@ def get_file_path(instance, filename): if instance.library.actor.get_user(): return common_utils.ChunkedPath("tracks")(instance, filename) + elif instance.third_party_provider: + return common_utils.ChunkedPath("third_party_tracks")(instance, filename) else: # we cache remote tracks in a different directory return common_utils.ChunkedPath("federation_cache/tracks")(instance, filename) @@ -842,6 +854,9 @@ class Upload(models.Model): checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True) quality = models.IntegerField(choices=quality_choices, default=1) + + third_party_provider = models.CharField(max_length=100, null=True, blank=True) + objects = UploadQuerySet.as_manager() @property diff --git a/api/funkwhale_api/music/utils.py b/api/funkwhale_api/music/utils.py index e4b18197b..99d6bf85c 100644 --- a/api/funkwhale_api/music/utils.py +++ b/api/funkwhale_api/music/utils.py @@ -56,6 +56,7 @@ AUDIO_EXTENSIONS_AND_MIMETYPE = [ ("opus", "audio/opus"), ("aac", "audio/x-m4a"), ("m4a", "audio/x-m4a"), + ("m4a", "audio/m4a"), ("flac", "audio/x-flac"), ("flac", "audio/flac"), ("aif", "audio/aiff"), diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py index b26364ecf..0e9420f9c 100644 --- a/api/funkwhale_api/music/views.py +++ b/api/funkwhale_api/music/views.py @@ -8,7 +8,7 @@ import requests.exceptions from django.conf import settings from django.core.cache import cache from django.db import transaction -from django.db.models import Count, F, Prefetch, Q, Sum +from django.db.models import BooleanField, Case, Count, F, Prefetch, Q, Sum, Value, When from django.db.models.functions import Collate from django.utils import timezone from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view @@ -665,7 +665,15 @@ def handle_stream(track, request, download, explicit_file, format, max_bitrate): if explicit_file: queryset = queryset.filter(uuid=explicit_file) queryset = queryset.playable_by(actor) - queryset = queryset.order_by(F("audio_file").desc(nulls_last=True)) + # third_party uploads are displayed before manual upload only if no audio file is found in manual upload + queryset = queryset.order_by( + Case( + When(third_party_provider__isnull=False, then=Value(1)), + default=Value(0), + output_field=BooleanField(), + ), + F("audio_file").desc(nulls_last=True), + ) upload = queryset.first() if not upload: return Response(status=404) diff --git a/compose/app.django.yml b/compose/app.django.yml index 21961de7a..2b2b80f0e 100644 --- a/compose/app.django.yml +++ b/compose/app.django.yml @@ -2,6 +2,7 @@ x-django: &django image: funkwhale-api volumes: - ../api:/app + - ../.env:/app/.env - "${MUSIC_DIRECTORY_SERVE_PATH:-../.state/music}:/music:ro" - "../.state/plugins:/srv/funkwhale/plugins" - "../.state/staticfiles:/staticfiles" @@ -21,6 +22,15 @@ services: build: context: ../api dockerfile: Dockerfile.debian + healthcheck: + test: + [ + "CMD-SHELL", + 'docker compose logs api | grep -q "Uvicorn running on" || exit 0', + ] + interval: 3s + timeout: 5s + retries: 3 command: > sh -c " funkwhale-manage collectstatic --no-input && @@ -34,3 +44,6 @@ services: pip install watchdog[watchmedo] && watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY} ' + depends_on: + api: + condition: service_healthy diff --git a/docs/specs/third-party-tracks/index.md b/docs/specs/third-party-tracks/index.md new file mode 100644 index 000000000..36fb7260e --- /dev/null +++ b/docs/specs/third-party-tracks/index.md @@ -0,0 +1,34 @@ +# Third party tracks download + +## Issue + +Has a user I cannot listen to tracks that are not on the funkwhale Network. + +## Proposed solution + +Has an admin I can add plugins that support downloading tracks from third party services + +## Backend + +When a track queryset is called with `with_playable_uploads` if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`. + +`handle_stream` should filter the upload queryset to display manual upload before plugin upload + +## Plugin + +Plugins registering `TRIGGER_THIRD_PARTY_UPLOAD` should : + +- trigger celery task. If not the queryset will take a long time to complete. +- create an upload with an associated file +- delete the upload if no file is succefully downloaded + +An example can be found in `funkwhale_api.contrib.archivedl` + +## Follow up + +-The frontend should update the track object if `TRIGGER_THIRD_PARTY_UPLOAD` +`channels.group_send("instance_activity", {"type": "event.send", "text": "", "data": data})` +`InstanceActivityConsumer` `/front/src/init/webSocket.ts` + +- trigger a channels group send so the frontend can update track qs when/if the upload is ready +- Third party track stream (do not download the file, only pass a stream)