WIP:newfeature(backend):fetch musicbrainz metadata from search bar

This commit is contained in:
Petitminion 2025-06-03 03:40:40 +02:00
parent 87e7297fae
commit e1445c5637
6 changed files with 377 additions and 3 deletions

View File

@ -1,3 +1,5 @@
from urllib.parse import urlparse
import requests.exceptions
from django.conf import settings
from django.db import transaction
@ -252,11 +254,18 @@ class FetchViewSet(
if fetch.status == "finished":
# a duplicate was returned, no need to fetch again
return
if settings.FEDERATION_SYNCHRONOUS_FETCH:
tasks.fetch(fetch_id=fetch.pk)
parsed_url = urlparse(fetch.url)
domain = parsed_url.netloc
if domain in fetch.supported_services:
tasks.third_party_fetch(fetch_id=fetch.pk)
fetch.refresh_from_db()
else:
common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk)
if settings.FEDERATION_SYNCHRONOUS_FETCH:
tasks.fetch(fetch_id=fetch.pk)
fetch.refresh_from_db()
else:
common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk)
class DomainViewSet(

View File

@ -359,6 +359,22 @@ CONTEXTS = [
}
},
},
{
"shortId": "MB",
"contextUrl": None,
"documentUrl": "http://musicbrainz.org/ns/mmd-1.0#",
"document": {
"@context": {
"mb": "http://musicbrainz.org/ns/mmd-1.0#",
"schema": "http://schema.org#",
"Recording": "schema:MusicRecording",
"name": "schema:name",
"duration": "schema:duration",
"@id": "@id",
"@type": "@type",
},
},
},
]
CONTEXTS_BY_ID = {c["shortId"]: c for c in CONTEXTS}
@ -392,3 +408,4 @@ SEC = NS(CONTEXTS_BY_ID["SEC"])
FW = NS(CONTEXTS_BY_ID["FW"])
SC = NS(CONTEXTS_BY_ID["SC"])
LITEPUB = NS(CONTEXTS_BY_ID["LITEPUB"])
MB = NS(CONTEXTS_BY_ID["MB"])

View File

@ -18,6 +18,7 @@ from funkwhale_api.common import session
from funkwhale_api.common import utils as common_utils
from funkwhale_api.common import validators as common_validators
from funkwhale_api.music import utils as music_utils
from funkwhale_api.musicbrainz import serializers as musicbrainz_serializers
from . import utils as federation_utils
@ -411,8 +412,14 @@ class Fetch(models.Model):
contexts.AS.Organization: [serializers.ActorSerializer],
contexts.AS.Service: [serializers.ActorSerializer],
contexts.AS.Application: [serializers.ActorSerializer],
# for mb the key must be the api namespace
"recordings": [musicbrainz_serializers.RecordingSerializer],
}
@property
def supported_services(self):
return ["musicbrainz.org"]
class InboxItem(models.Model):
"""

View File

@ -2,6 +2,8 @@ import datetime
import json
import logging
import os
import uuid
from urllib.parse import urlparse
import requests
from django.conf import settings
@ -13,6 +15,7 @@ from django.utils import timezone
from dynamic_preferences.registries import global_preferences_registry
from requests.exceptions import RequestException
from funkwhale_api import musicbrainz
from funkwhale_api.audio import models as audio_models
from funkwhale_api.common import models as common_models
from funkwhale_api.common import preferences, session
@ -456,6 +459,137 @@ def fetch(fetch_obj):
)
def musicbrainz_type_handler(fetch):
url = fetch.url
path_parts = urlparse(url).path.strip("/").split("/")
type_ = path_parts[0] + "s"
mbid = path_parts[1]
try:
uuid.UUID(mbid)
except ValueError:
raise ValueError(f"could no get mbid from url {url}")
return type_, mbid
def musicbrainz_metadata_handler(type_, id):
def replace_hyphens_in_keys(obj):
if isinstance(obj, dict):
return {
k.replace("-", "_"): replace_hyphens_in_keys(v) for k, v in obj.items()
}
elif isinstance(obj, list):
return [replace_hyphens_in_keys(item) for item in obj]
else:
return obj
result = replace_hyphens_in_keys(
getattr(musicbrainz.api, type_).get(
id=id, includes=["tags", "artists", "releases"]
)
)
existing = (
music_models.Track.objects.filter(mbid=id).first()
if music_models.Track.objects.filter(mbid=id).exists()
else None
)
return result, existing
type_and_id_from_third_party = {"musicbrainz.org": musicbrainz_type_handler}
metadata_from_third_party_ = {"musicbrainz.org": musicbrainz_metadata_handler}
@celery.app.task(name="third_party_fetch")
@transaction.atomic
@celery.require_instance(
models.Fetch.objects.filter(status="pending").select_related("actor"),
"fetch_obj",
"fetch_id",
)
def third_party_fetch(fetch_obj):
def error(code, **kwargs):
fetch_obj.status = "errored"
fetch_obj.fetch_date = timezone.now()
fetch_obj.detail = {"error_code": code}
fetch_obj.detail.update(kwargs)
fetch_obj.save(update_fields=["fetch_date", "status", "detail"])
def check_url(url):
if not url.startswith("webfinger://"):
payload, updated = mrf.inbox.apply({"id": url})
if not payload:
return error("blocked", message="Blocked by MRF")
parsed_url = urlparse(url)
service = parsed_url.netloc
if service not in fetch_obj.supported_services:
return error("invalid_url", message=f"Unsupported domain {service}")
return service
url = fetch_obj.url
actor = fetch_obj.actor
service = check_url(url)
try:
type_, id = type_and_id_from_third_party[service](fetch_obj)
logger.debug("Parsed URL %s into type %s and id %s", url, type_, id)
except ValueError as e:
return error("url_parse_error", message=e.message)
try:
result, existing = metadata_from_third_party_[service](type_, id)
logger.debug(
f"Remote answered with {result} and we found {existing} in database"
)
except requests.exceptions.HTTPError as e:
return error(
"http",
status_code=e.response.status_code if e.response else None,
message=e.response.text,
)
except requests.exceptions.Timeout:
return error("timeout")
except requests.exceptions.ConnectionError as e:
return error("connection", message=str(e))
except requests.RequestException as e:
return error("request", message=str(e))
except Exception as e:
return error("unhandled", message=str(e))
try:
serializer_classes = fetch_obj.serializers.get(type_)
except (KeyError, AttributeError):
fetch_obj.status = "skipped"
fetch_obj.fetch_date = timezone.now()
fetch_obj.detail = {"reason": "unhandled_type", "type": type_}
return fetch_obj.save(update_fields=["fetch_date", "status", "detail"])
serializer = None
for serializer_class in serializer_classes:
serializer = serializer_class(
existing, data=result, context={"fetch_actor": actor}
)
if not serializer.is_valid():
continue
else:
break
if serializer.errors:
return error("validation", validation_errors=serializer.errors)
try:
obj = serializer.save()
except Exception as e:
error("save", message=str(e))
raise
fetch_obj.object = obj
fetch_obj.status = "finished"
fetch_obj.fetch_date = timezone.now()
return fetch_obj.save(
update_fields=["fetch_date", "status", "object_id", "object_content_type"]
)
class PreserveSomeDataCollector(Collector):
"""
We need to delete everything related to an actor. Well Almost everything.

View File

@ -0,0 +1,123 @@
from rest_framework import serializers
from funkwhale_api.tags import models as tags_models
class ArtistSerializer(serializers.Serializer):
"""
Serializer for Musicbrainz artist data.
"""
id = serializers.CharField()
name = serializers.CharField()
def create(self, validated_data):
from funkwhale_api.music.models import Artist
data = {
"name": validated_data["name"],
"mbid": validated_data["id"],
}
artist = Artist.objects.create(**data)
return artist
class ArtistCreditSerializer(serializers.Serializer):
"""
Serializer for Musicbrainz artist data.
"""
name = serializers.CharField()
joinphrase = serializers.CharField(allow_blank=True)
artist = ArtistSerializer()
def create(self, validated_data):
from funkwhale_api.music.models import ArtistCredit
data = {
"credit": validated_data["name"],
"joinphrase": validated_data.get("joinphrase", ""),
"artist": ArtistSerializer().create(validated_data["artist"]),
}
artist_credit = ArtistCredit.objects.create(**data)
return artist_credit
class ReleaseSerializer(serializers.Serializer):
"""
Serializer for Musicbrainz release data.
"""
id = serializers.CharField()
title = serializers.CharField()
artist_credit = ArtistCreditSerializer(many=True)
tags = serializers.ListField(child=serializers.CharField(), allow_empty=True)
date = serializers.DateField(input_formats=["%Y", "%Y/%m/%d", "%Y-%m-%d"])
def create(self, validated_data):
from funkwhale_api.music.models import Album
data = {
"title": validated_data["title"],
"mbid": validated_data["id"],
"release_date": validated_data.get("date"),
}
album = Album.objects.create(**data)
artist_credit = ArtistCreditSerializer(many=True).create(
validated_data["artist_credit"]
)
album.artist_credit.set(artist_credit)
album.save()
tags_models.add_tags(album, *validated_data.get("tags", []))
return album
def update(self, instance, validated_data):
instance.title = validated_data["title"]
instance.release_date = validated_data.get("date")
instance.save()
tags_models.add_tags(instance, *validated_data.get("tags", []))
return instance
class RecordingSerializer(serializers.Serializer):
"""
Serializer for Musicbrainz track data.
"""
# class Meta:
# model = Track
id = serializers.CharField()
title = serializers.CharField()
artist_credit = ArtistCreditSerializer(many=True)
releases = ReleaseSerializer(many=True)
tags = serializers.ListField(child=serializers.CharField(), allow_empty=True)
def create(self, validated_data):
from funkwhale_api.music.models import Track
data = {
"title": validated_data["title"],
"mbid": validated_data["id"],
# In mb a recording can have various releases, we take the fist one
"album": ReleaseSerializer(many=True).create(validated_data["releases"])[0],
}
track = Track.objects.create(**data)
artist_credit = ArtistCreditSerializer(many=True).create(
validated_data["artist_credit"]
)
track.artist_credit.set(artist_credit)
track.save()
tags_models.add_tags(track, *validated_data.get("tags", []))
return track
def update(self, instance, validated_data):
instance.title = validated_data["title"]
instance.save()
tags_models.add_tags(instance, *validated_data.get("tags", []))
return instance

View File

@ -732,3 +732,87 @@ def test_fetch_webfinger_create_actor(factories, r_mock, mocker):
assert init.call_args[0][1] == actor
assert init.call_args[1]["data"] == payload
assert save.call_count == 1
def test_third_party_fetch_success(factories, r_mock, mocker):
track = factories["music.Track"]()
url = f"https://musicbrainz.org/recording/{track.mbid}"
fetch = factories["federation.Fetch"](url=url)
payload = {
"releases": [
{
"status": "Promotion",
"id": "220ffb88-49ed-4df4-a330-46f8e7353ff0",
"country": "DE",
"title": "With Oi! To Hope for Myanmar 2022",
"quality": "normal",
"release-events": [
{
"area": {
"name": "Germany",
"id": "85752fda-13c4-31a3-bee5-0e5cb1f51dad",
"sort-name": "Germany",
"disambiguation": "",
"iso-3166-1-codes": ["DE"],
},
"date": "2022",
}
],
"disambiguation": "Version aus 2022",
"status-id": "518ffc83-5cde-34df-8627-81bff5093d92",
"packaging-id": "8f931351-d2e2-310f-afc6-37b89ddba246",
"artist-credit": [
{
"artist": {
"sort-name": "Various Artists",
"name": "Various Artists",
"disambiguation": "add compilations to this artist",
"type": "Other",
"type-id": "ac897045-5043-3294-969b-187360e45d86",
"id": "89ad4ac3-39f7-470e-963a-56509c546377",
},
"joinphrase": "",
"name": "Various Artists",
}
],
"barcode": "",
"date": "2022",
"packaging": "Digipak",
"text-representation": {"language": "mul", "script": "Latn"},
}
],
"disambiguation": "",
"video": False,
"first-release-date": "2022",
"artist-credit": [
{
"artist": {
"name": "The Rebel Riot",
"disambiguation": "",
"sort-name": "Rebel Riot, The",
"type": "Group",
"id": "1ff2cd0c-2ac1-4296-b650-77ef57bb0d01",
"country": "MM",
"type-id": "e431f5f6-b5d2-343d-8b36-72607fffb74b",
},
"name": "The Rebel Riot",
"joinphrase": "",
}
],
"title": "A.C.A.B.",
"id": "455cd030-7394-4244-9a53-3b96a666b1c6",
"length": 193253,
}
init = mocker.spy(serializers.ArtistSerializer, "__init__")
save = mocker.spy(serializers.ArtistSerializer, "save")
r_mock.get(url, json=payload)
tasks.third_party_fetch(fetch_id=fetch.pk)
fetch.refresh_from_db()
assert fetch.status == "finished"
assert init.call_count == 1
assert init.call_args[1]["data"] == payload
assert save.call_count == 1