Allow plugin to download third party tracks

This commit is contained in:
petitminion 2025-01-12 17:30:16 +00:00
parent 6f2c001bc2
commit b6d27a58d3
11 changed files with 292 additions and 3 deletions

View File

@ -299,6 +299,10 @@ def background_task(name):
# HOOKS
TRIGGER_THIRD_PARTY_UPLOAD = "third_party_upload"
"""
Called when a track is being listened
"""
LISTENING_CREATED = "listening_created"
"""
Called when a track is being listened

View File

@ -114,6 +114,7 @@ else:
logger.info("Loaded env file at %s/.env", path)
break
FUNKWHALE_PLUGINS = env("FUNKWHALE_PLUGINS", default="")
FUNKWHALE_PLUGINS_PATH = env(
"FUNKWHALE_PLUGINS_PATH", default="/srv/funkwhale/plugins/"
)

View File

@ -0,0 +1,13 @@
import logging
from config import plugins
from funkwhale_api.contrib.archivedl import tasks
from .funkwhale_startup import PLUGIN
logger = logging.getLogger(__name__)
@plugins.register_hook(plugins.TRIGGER_THIRD_PARTY_UPLOAD, PLUGIN)
def lauch_download(track, conf={}):
tasks.archive_download.delay(track_id=track.pk, conf=conf)

View File

@ -0,0 +1,10 @@
from config import plugins
PLUGIN = plugins.get_plugin_config(
name="archivedl",
label="Archive-dl",
description="",
version="0.1",
user=False,
conf=[],
)

View File

@ -0,0 +1,148 @@
import asyncio
import hashlib
import logging
import os
import tempfile
import urllib.parse
import requests
from django.core.files import File
from django.utils import timezone
from funkwhale_api.federation import actors
from funkwhale_api.music import models, utils
from funkwhale_api.taskapp import celery
logger = logging.getLogger(__name__)
def create_upload(url, track, files_data):
mimetype = f"audio/{files_data.get('format', 'unknown')}"
duration = files_data.get("mtime", 0)
filesize = files_data.get("size", 0)
bitrate = files_data.get("bitrate", 0)
service_library = models.Library.objects.create(
privacy_level="everyone",
actor=actors.get_service_actor(),
)
return models.Upload.objects.create(
mimetype=mimetype,
source=url,
third_party_provider="archive-dl",
creation_date=timezone.now(),
track=track,
duration=duration,
size=filesize,
bitrate=bitrate,
library=service_library,
from_activity=None,
import_status="finished",
)
@celery.app.task(name="archivedl.archive_download")
@celery.require_instance(models.Track.objects.select_related(), "track")
def archive_download(track, conf):
artist_name = utils.get_artist_credit_string(track)
query = f"mediatype:audio AND title:{track.title} AND creator:{artist_name}"
with requests.Session() as session:
url = get_search_url(query, page_size=1, page=1)
page_data = fetch_json(url, session)
for obj in page_data["response"]["docs"]:
logger.info(f"launching download item for {str(obj)}")
download_item(
item_data=obj,
session=session,
allowed_extensions=utils.SUPPORTED_EXTENSIONS,
track=track,
)
def fetch_json(url, session):
logger.info(f"Fetching {url}...")
with session.get(url) as response:
return response.json()
def download_item(
item_data,
session,
allowed_extensions,
track,
):
files_data = get_files_data(item_data["identifier"], session)
to_download = list(
filter_files(
files_data["result"],
allowed_extensions=allowed_extensions,
)
)
url = f"https://archive.org/download/{item_data['identifier']}/{to_download[0]['name']}"
upload = create_upload(url, track, to_download[0])
try:
with tempfile.TemporaryDirectory() as temp_dir:
path = os.path.join(temp_dir, to_download[0]["name"])
download_file(
path,
url=url,
session=session,
checksum=to_download[0]["sha1"],
upload=upload,
to_download=to_download,
)
logger.info(f"Finished to download item {item_data['identifier']}...")
except Exception as e:
upload.delete()
raise e
def check_integrity(path, expected_checksum):
with open(path, mode="rb") as f:
hash = hashlib.sha1()
hash.update(f.read())
return expected_checksum == hash.hexdigest()
def get_files_data(identifier, session):
url = f"https://archive.org/metadata/{identifier}/files"
logger.info(f"Fetching files data at {url}...")
with session.get(url) as response:
return response.json()
def download_file(path, url, session, checksum, upload, to_download):
if os.path.exists(path) and check_integrity(path, checksum):
logger.info(f"Skipping already downloaded file at {path}")
return
logger.info(f"Downloading file {url}...")
with open(path, mode="wb") as f:
try:
with session.get(url) as response:
f.write(response.content)
except asyncio.TimeoutError as e:
logger.error(f"Timeout error while downloading {url}: {e}")
with open(path, "rb") as f:
upload.audio_file.save(f"{to_download['name']}", File(f))
upload.import_status = "finished"
upload.url = url
upload.save()
return upload
def filter_files(files, allowed_extensions):
for f in files:
if allowed_extensions:
extension = os.path.splitext(f["name"])[-1][1:]
if extension not in allowed_extensions:
continue
yield f
def get_search_url(query, page_size, page):
q = urllib.parse.urlencode({"q": query})
return f"https://archive.org/advancedsearch.php?{q}&sort[]=addeddate+desc&rows={page_size}&page={page}&output=json"

View File

@ -0,0 +1,42 @@
# Generated by Django 4.2.9 on 2024-12-21 20:02
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("music", "0061_migrate_libraries_to_playlist"),
]
operations = [
migrations.AddField(
model_name="upload",
name="third_party_provider",
field=models.CharField(blank=True, max_length=100, null=True),
),
migrations.AlterField(
model_name="uploadversion",
name="mimetype",
field=models.CharField(
choices=[
("audio/mp3", "mp3"),
("audio/mpeg3", "mp3"),
("audio/x-mp3", "mp3"),
("audio/mpeg", "mp3"),
("video/ogg", "ogg"),
("audio/ogg", "ogg"),
("audio/opus", "opus"),
("audio/x-m4a", "aac"),
("audio/x-m4a", "m4a"),
("audio/m4a", "m4a"),
("audio/x-flac", "flac"),
("audio/flac", "flac"),
("audio/aiff", "aif"),
("audio/x-aiff", "aif"),
("audio/aiff", "aiff"),
("audio/x-aiff", "aiff"),
],
max_length=50,
),
),
]

View File

@ -24,6 +24,7 @@ from django.dispatch import receiver
from django.urls import reverse
from django.utils import timezone
from config import plugins
from funkwhale_api import musicbrainz
from funkwhale_api.common import fields
from funkwhale_api.common import models as common_models
@ -522,10 +523,19 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
def with_playable_uploads(self, actor):
uploads = Upload.objects.playable_by(actor)
return self.prefetch_related(
queryset = self.prefetch_related(
models.Prefetch("uploads", queryset=uploads, to_attr="playable_uploads")
)
if queryset and queryset[0].uploads.count() > 0:
return queryset
else:
plugins.trigger_hook(
plugins.TRIGGER_THIRD_PARTY_UPLOAD,
track=self.first(),
)
return queryset
def order_for_album(self):
"""
Order by disc number then position
@ -771,6 +781,8 @@ def get_file_path(instance, filename):
if instance.library.actor.get_user():
return common_utils.ChunkedPath("tracks")(instance, filename)
elif instance.third_party_provider:
return common_utils.ChunkedPath("third_party_tracks")(instance, filename)
else:
# we cache remote tracks in a different directory
return common_utils.ChunkedPath("federation_cache/tracks")(instance, filename)
@ -842,6 +854,9 @@ class Upload(models.Model):
checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True)
quality = models.IntegerField(choices=quality_choices, default=1)
third_party_provider = models.CharField(max_length=100, null=True, blank=True)
objects = UploadQuerySet.as_manager()
@property

View File

@ -56,6 +56,7 @@ AUDIO_EXTENSIONS_AND_MIMETYPE = [
("opus", "audio/opus"),
("aac", "audio/x-m4a"),
("m4a", "audio/x-m4a"),
("m4a", "audio/m4a"),
("flac", "audio/x-flac"),
("flac", "audio/flac"),
("aif", "audio/aiff"),

View File

@ -8,7 +8,7 @@ import requests.exceptions
from django.conf import settings
from django.core.cache import cache
from django.db import transaction
from django.db.models import Count, F, Prefetch, Q, Sum
from django.db.models import BooleanField, Case, Count, F, Prefetch, Q, Sum, Value, When
from django.db.models.functions import Collate
from django.utils import timezone
from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view
@ -665,7 +665,15 @@ def handle_stream(track, request, download, explicit_file, format, max_bitrate):
if explicit_file:
queryset = queryset.filter(uuid=explicit_file)
queryset = queryset.playable_by(actor)
queryset = queryset.order_by(F("audio_file").desc(nulls_last=True))
# third_party uploads are displayed before manual upload only if no audio file is found in manual upload
queryset = queryset.order_by(
Case(
When(third_party_provider__isnull=False, then=Value(1)),
default=Value(0),
output_field=BooleanField(),
),
F("audio_file").desc(nulls_last=True),
)
upload = queryset.first()
if not upload:
return Response(status=404)

View File

@ -2,6 +2,7 @@ x-django: &django
image: funkwhale-api
volumes:
- ../api:/app
- ../.env:/app/.env
- "${MUSIC_DIRECTORY_SERVE_PATH:-../.state/music}:/music:ro"
- "../.state/plugins:/srv/funkwhale/plugins"
- "../.state/staticfiles:/staticfiles"
@ -21,6 +22,15 @@ services:
build:
context: ../api
dockerfile: Dockerfile.debian
healthcheck:
test:
[
"CMD-SHELL",
'docker compose logs api | grep -q "Uvicorn running on" || exit 0',
]
interval: 3s
timeout: 5s
retries: 3
command: >
sh -c "
funkwhale-manage collectstatic --no-input &&
@ -34,3 +44,6 @@ services:
pip install watchdog[watchmedo] &&
watchmedo auto-restart --patterns="*.py" --recursive -- celery -A funkwhale_api.taskapp worker -l debug -B --concurrency=${CELERYD_CONCURRENCY}
'
depends_on:
api:
condition: service_healthy

View File

@ -0,0 +1,34 @@
# Third party tracks download
## Issue
Has a user I cannot listen to tracks that are not on the funkwhale Network.
## Proposed solution
Has an admin I can add plugins that support downloading tracks from third party services
## Backend
When a track queryset is called with `with_playable_uploads` if no upload is found we trigger `plugins.TRIGGER_THIRD_PARTY_UPLOAD`.
`handle_stream` should filter the upload queryset to display manual upload before plugin upload
## Plugin
Plugins registering `TRIGGER_THIRD_PARTY_UPLOAD` should :
- trigger celery task. If not the queryset will take a long time to complete.
- create an upload with an associated file
- delete the upload if no file is succefully downloaded
An example can be found in `funkwhale_api.contrib.archivedl`
## Follow up
-The frontend should update the track object if `TRIGGER_THIRD_PARTY_UPLOAD`
`channels.group_send("instance_activity", {"type": "event.send", "text": "", "data": data})`
`InstanceActivityConsumer` `/front/src/init/webSocket.ts`
- trigger a channels group send so the frontend can update track qs when/if the upload is ready
- Third party track stream (do not download the file, only pass a stream)