diff --git a/api/funkwhale_api/federation/api_views.py b/api/funkwhale_api/federation/api_views.py index a076f5a1d..5498c67d5 100644 --- a/api/funkwhale_api/federation/api_views.py +++ b/api/funkwhale_api/federation/api_views.py @@ -1,3 +1,5 @@ +from urllib.parse import urlparse + import requests.exceptions from django.conf import settings from django.db import transaction @@ -252,11 +254,18 @@ class FetchViewSet( if fetch.status == "finished": # a duplicate was returned, no need to fetch again return - if settings.FEDERATION_SYNCHRONOUS_FETCH: - tasks.fetch(fetch_id=fetch.pk) + + parsed_url = urlparse(fetch.url) + domain = parsed_url.netloc + if domain in fetch.supported_services: + tasks.third_party_fetch(fetch_id=fetch.pk) fetch.refresh_from_db() else: - common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk) + if settings.FEDERATION_SYNCHRONOUS_FETCH: + tasks.fetch(fetch_id=fetch.pk) + fetch.refresh_from_db() + else: + common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk) class DomainViewSet( diff --git a/api/funkwhale_api/federation/contexts.py b/api/funkwhale_api/federation/contexts.py index 5ea7ae4d4..6efa06ed9 100644 --- a/api/funkwhale_api/federation/contexts.py +++ b/api/funkwhale_api/federation/contexts.py @@ -359,6 +359,22 @@ CONTEXTS = [ } }, }, + { + "shortId": "MB", + "contextUrl": None, + "documentUrl": "http://musicbrainz.org/ns/mmd-1.0#", + "document": { + "@context": { + "mb": "http://musicbrainz.org/ns/mmd-1.0#", + "schema": "http://schema.org#", + "Recording": "schema:MusicRecording", + "name": "schema:name", + "duration": "schema:duration", + "@id": "@id", + "@type": "@type", + }, + }, + }, ] CONTEXTS_BY_ID = {c["shortId"]: c for c in CONTEXTS} @@ -392,3 +408,4 @@ SEC = NS(CONTEXTS_BY_ID["SEC"]) FW = NS(CONTEXTS_BY_ID["FW"]) SC = NS(CONTEXTS_BY_ID["SC"]) LITEPUB = NS(CONTEXTS_BY_ID["LITEPUB"]) +MB = NS(CONTEXTS_BY_ID["MB"]) diff --git a/api/funkwhale_api/federation/models.py b/api/funkwhale_api/federation/models.py index b01355493..982619c9e 100644 --- a/api/funkwhale_api/federation/models.py +++ b/api/funkwhale_api/federation/models.py @@ -18,6 +18,7 @@ from funkwhale_api.common import session from funkwhale_api.common import utils as common_utils from funkwhale_api.common import validators as common_validators from funkwhale_api.music import utils as music_utils +from funkwhale_api.musicbrainz import serializers as musicbrainz_serializers from . import utils as federation_utils @@ -411,8 +412,14 @@ class Fetch(models.Model): contexts.AS.Organization: [serializers.ActorSerializer], contexts.AS.Service: [serializers.ActorSerializer], contexts.AS.Application: [serializers.ActorSerializer], + # for mb the key must be the api namespace + "recordings": [musicbrainz_serializers.RecordingSerializer], } + @property + def supported_services(self): + return ["musicbrainz.org"] + class InboxItem(models.Model): """ diff --git a/api/funkwhale_api/federation/tasks.py b/api/funkwhale_api/federation/tasks.py index df08b5f38..af617e9ef 100644 --- a/api/funkwhale_api/federation/tasks.py +++ b/api/funkwhale_api/federation/tasks.py @@ -2,6 +2,8 @@ import datetime import json import logging import os +import uuid +from urllib.parse import urlparse import requests from django.conf import settings @@ -13,6 +15,7 @@ from django.utils import timezone from dynamic_preferences.registries import global_preferences_registry from requests.exceptions import RequestException +from funkwhale_api import musicbrainz from funkwhale_api.audio import models as audio_models from funkwhale_api.common import models as common_models from funkwhale_api.common import preferences, session @@ -456,6 +459,137 @@ def fetch(fetch_obj): ) +def musicbrainz_type_handler(fetch): + url = fetch.url + path_parts = urlparse(url).path.strip("/").split("/") + type_ = path_parts[0] + "s" + mbid = path_parts[1] + try: + uuid.UUID(mbid) + except ValueError: + raise ValueError(f"could no get mbid from url {url}") + return type_, mbid + + +def musicbrainz_metadata_handler(type_, id): + def replace_hyphens_in_keys(obj): + if isinstance(obj, dict): + return { + k.replace("-", "_"): replace_hyphens_in_keys(v) for k, v in obj.items() + } + elif isinstance(obj, list): + return [replace_hyphens_in_keys(item) for item in obj] + else: + return obj + + result = replace_hyphens_in_keys( + getattr(musicbrainz.api, type_).get( + id=id, includes=["tags", "artists", "releases"] + ) + ) + + existing = ( + music_models.Track.objects.filter(mbid=id).first() + if music_models.Track.objects.filter(mbid=id).exists() + else None + ) + return result, existing + + +type_and_id_from_third_party = {"musicbrainz.org": musicbrainz_type_handler} +metadata_from_third_party_ = {"musicbrainz.org": musicbrainz_metadata_handler} + + +@celery.app.task(name="third_party_fetch") +@transaction.atomic +@celery.require_instance( + models.Fetch.objects.filter(status="pending").select_related("actor"), + "fetch_obj", + "fetch_id", +) +def third_party_fetch(fetch_obj): + def error(code, **kwargs): + fetch_obj.status = "errored" + fetch_obj.fetch_date = timezone.now() + fetch_obj.detail = {"error_code": code} + fetch_obj.detail.update(kwargs) + fetch_obj.save(update_fields=["fetch_date", "status", "detail"]) + + def check_url(url): + if not url.startswith("webfinger://"): + payload, updated = mrf.inbox.apply({"id": url}) + if not payload: + return error("blocked", message="Blocked by MRF") + + parsed_url = urlparse(url) + service = parsed_url.netloc + if service not in fetch_obj.supported_services: + return error("invalid_url", message=f"Unsupported domain {service}") + return service + + url = fetch_obj.url + actor = fetch_obj.actor + service = check_url(url) + + try: + type_, id = type_and_id_from_third_party[service](fetch_obj) + logger.debug("Parsed URL %s into type %s and id %s", url, type_, id) + except ValueError as e: + return error("url_parse_error", message=e.message) + + try: + result, existing = metadata_from_third_party_[service](type_, id) + logger.debug( + f"Remote answered with {result} and we found {existing} in database" + ) + except requests.exceptions.HTTPError as e: + return error( + "http", + status_code=e.response.status_code if e.response else None, + message=e.response.text, + ) + except requests.exceptions.Timeout: + return error("timeout") + except requests.exceptions.ConnectionError as e: + return error("connection", message=str(e)) + except requests.RequestException as e: + return error("request", message=str(e)) + except Exception as e: + return error("unhandled", message=str(e)) + + try: + serializer_classes = fetch_obj.serializers.get(type_) + except (KeyError, AttributeError): + fetch_obj.status = "skipped" + fetch_obj.fetch_date = timezone.now() + fetch_obj.detail = {"reason": "unhandled_type", "type": type_} + return fetch_obj.save(update_fields=["fetch_date", "status", "detail"]) + + serializer = None + for serializer_class in serializer_classes: + serializer = serializer_class( + existing, data=result, context={"fetch_actor": actor} + ) + if not serializer.is_valid(): + continue + else: + break + if serializer.errors: + return error("validation", validation_errors=serializer.errors) + try: + obj = serializer.save() + except Exception as e: + error("save", message=str(e)) + raise + + fetch_obj.object = obj + fetch_obj.status = "finished" + fetch_obj.fetch_date = timezone.now() + return fetch_obj.save( + update_fields=["fetch_date", "status", "object_id", "object_content_type"] + ) + + class PreserveSomeDataCollector(Collector): """ We need to delete everything related to an actor. Well… Almost everything. diff --git a/api/funkwhale_api/musicbrainz/serializers.py b/api/funkwhale_api/musicbrainz/serializers.py new file mode 100644 index 000000000..2c68ff6d3 --- /dev/null +++ b/api/funkwhale_api/musicbrainz/serializers.py @@ -0,0 +1,123 @@ +from rest_framework import serializers + +from funkwhale_api.tags import models as tags_models + + +class ArtistSerializer(serializers.Serializer): + """ + Serializer for Musicbrainz artist data. + """ + + id = serializers.CharField() + name = serializers.CharField() + + def create(self, validated_data): + from funkwhale_api.music.models import Artist + + data = { + "name": validated_data["name"], + "mbid": validated_data["id"], + } + artist = Artist.objects.create(**data) + return artist + + +class ArtistCreditSerializer(serializers.Serializer): + """ + Serializer for Musicbrainz artist data. + """ + + name = serializers.CharField() + joinphrase = serializers.CharField(allow_blank=True) + artist = ArtistSerializer() + + def create(self, validated_data): + from funkwhale_api.music.models import ArtistCredit + + data = { + "credit": validated_data["name"], + "joinphrase": validated_data.get("joinphrase", ""), + "artist": ArtistSerializer().create(validated_data["artist"]), + } + artist_credit = ArtistCredit.objects.create(**data) + return artist_credit + + +class ReleaseSerializer(serializers.Serializer): + """ + Serializer for Musicbrainz release data. + """ + + id = serializers.CharField() + title = serializers.CharField() + artist_credit = ArtistCreditSerializer(many=True) + tags = serializers.ListField(child=serializers.CharField(), allow_empty=True) + date = serializers.DateField(input_formats=["%Y", "%Y/%m/%d", "%Y-%m-%d"]) + + def create(self, validated_data): + from funkwhale_api.music.models import Album + + data = { + "title": validated_data["title"], + "mbid": validated_data["id"], + "release_date": validated_data.get("date"), + } + album = Album.objects.create(**data) + artist_credit = ArtistCreditSerializer(many=True).create( + validated_data["artist_credit"] + ) + album.artist_credit.set(artist_credit) + album.save() + tags_models.add_tags(album, *validated_data.get("tags", [])) + + return album + + def update(self, instance, validated_data): + instance.title = validated_data["title"] + instance.release_date = validated_data.get("date") + instance.save() + tags_models.add_tags(instance, *validated_data.get("tags", [])) + + return instance + + +class RecordingSerializer(serializers.Serializer): + """ + Serializer for Musicbrainz track data. + """ + + # class Meta: + # model = Track + + id = serializers.CharField() + title = serializers.CharField() + artist_credit = ArtistCreditSerializer(many=True) + releases = ReleaseSerializer(many=True) + tags = serializers.ListField(child=serializers.CharField(), allow_empty=True) + + def create(self, validated_data): + from funkwhale_api.music.models import Track + + data = { + "title": validated_data["title"], + "mbid": validated_data["id"], + # In mb a recording can have various releases, we take the fist one + "album": ReleaseSerializer(many=True).create(validated_data["releases"])[0], + } + track = Track.objects.create(**data) + artist_credit = ArtistCreditSerializer(many=True).create( + validated_data["artist_credit"] + ) + track.artist_credit.set(artist_credit) + track.save() + + tags_models.add_tags(track, *validated_data.get("tags", [])) + + return track + + def update(self, instance, validated_data): + instance.title = validated_data["title"] + instance.save() + tags_models.add_tags(instance, *validated_data.get("tags", [])) + + return instance diff --git a/api/tests/federation/test_tasks.py b/api/tests/federation/test_tasks.py index 7f9fd6a93..0e21a0a3e 100644 --- a/api/tests/federation/test_tasks.py +++ b/api/tests/federation/test_tasks.py @@ -732,3 +732,87 @@ def test_fetch_webfinger_create_actor(factories, r_mock, mocker): assert init.call_args[0][1] == actor assert init.call_args[1]["data"] == payload assert save.call_count == 1 + + +def test_third_party_fetch_success(factories, r_mock, mocker): + track = factories["music.Track"]() + url = f"https://musicbrainz.org/recording/{track.mbid}" + fetch = factories["federation.Fetch"](url=url) + payload = { + "releases": [ + { + "status": "Promotion", + "id": "220ffb88-49ed-4df4-a330-46f8e7353ff0", + "country": "DE", + "title": "With Oi! To Hope for Myanmar 2022", + "quality": "normal", + "release-events": [ + { + "area": { + "name": "Germany", + "id": "85752fda-13c4-31a3-bee5-0e5cb1f51dad", + "sort-name": "Germany", + "disambiguation": "", + "iso-3166-1-codes": ["DE"], + }, + "date": "2022", + } + ], + "disambiguation": "Version aus 2022", + "status-id": "518ffc83-5cde-34df-8627-81bff5093d92", + "packaging-id": "8f931351-d2e2-310f-afc6-37b89ddba246", + "artist-credit": [ + { + "artist": { + "sort-name": "Various Artists", + "name": "Various Artists", + "disambiguation": "add compilations to this artist", + "type": "Other", + "type-id": "ac897045-5043-3294-969b-187360e45d86", + "id": "89ad4ac3-39f7-470e-963a-56509c546377", + }, + "joinphrase": "", + "name": "Various Artists", + } + ], + "barcode": "", + "date": "2022", + "packaging": "Digipak", + "text-representation": {"language": "mul", "script": "Latn"}, + } + ], + "disambiguation": "", + "video": False, + "first-release-date": "2022", + "artist-credit": [ + { + "artist": { + "name": "The Rebel Riot", + "disambiguation": "", + "sort-name": "Rebel Riot, The", + "type": "Group", + "id": "1ff2cd0c-2ac1-4296-b650-77ef57bb0d01", + "country": "MM", + "type-id": "e431f5f6-b5d2-343d-8b36-72607fffb74b", + }, + "name": "The Rebel Riot", + "joinphrase": "", + } + ], + "title": "A.C.A.B.", + "id": "455cd030-7394-4244-9a53-3b96a666b1c6", + "length": 193253, + } + init = mocker.spy(serializers.ArtistSerializer, "__init__") + save = mocker.spy(serializers.ArtistSerializer, "save") + + r_mock.get(url, json=payload) + + tasks.third_party_fetch(fetch_id=fetch.pk) + + fetch.refresh_from_db() + + assert fetch.status == "finished" + assert init.call_count == 1 + assert init.call_args[1]["data"] == payload + assert save.call_count == 1