WIP:newfeature(backend):fetch musicbrainz metadata from search bar
This commit is contained in:
parent
ad9a829af6
commit
ad91036d51
|
@ -1,3 +1,5 @@
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import requests.exceptions
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
|
@ -252,11 +254,18 @@ class FetchViewSet(
|
|||
if fetch.status == "finished":
|
||||
# a duplicate was returned, no need to fetch again
|
||||
return
|
||||
if settings.FEDERATION_SYNCHRONOUS_FETCH:
|
||||
tasks.fetch(fetch_id=fetch.pk)
|
||||
|
||||
parsed_url = urlparse(fetch.url)
|
||||
domain = parsed_url.netloc
|
||||
if domain in fetch.supported_services:
|
||||
tasks.third_party_fetch(fetch_id=fetch.pk)
|
||||
fetch.refresh_from_db()
|
||||
else:
|
||||
common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk)
|
||||
if settings.FEDERATION_SYNCHRONOUS_FETCH:
|
||||
tasks.fetch(fetch_id=fetch.pk)
|
||||
fetch.refresh_from_db()
|
||||
else:
|
||||
common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk)
|
||||
|
||||
|
||||
class DomainViewSet(
|
||||
|
|
|
@ -359,6 +359,22 @@ CONTEXTS = [
|
|||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"shortId": "MB",
|
||||
"contextUrl": None,
|
||||
"documentUrl": "http://musicbrainz.org/ns/mmd-1.0#",
|
||||
"document": {
|
||||
"@context": {
|
||||
"mb": "http://musicbrainz.org/ns/mmd-1.0#",
|
||||
"schema": "http://schema.org#",
|
||||
"Recording": "schema:MusicRecording",
|
||||
"name": "schema:name",
|
||||
"duration": "schema:duration",
|
||||
"@id": "@id",
|
||||
"@type": "@type",
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
CONTEXTS_BY_ID = {c["shortId"]: c for c in CONTEXTS}
|
||||
|
@ -392,3 +408,4 @@ SEC = NS(CONTEXTS_BY_ID["SEC"])
|
|||
FW = NS(CONTEXTS_BY_ID["FW"])
|
||||
SC = NS(CONTEXTS_BY_ID["SC"])
|
||||
LITEPUB = NS(CONTEXTS_BY_ID["LITEPUB"])
|
||||
MB = NS(CONTEXTS_BY_ID["MB"])
|
||||
|
|
|
@ -18,6 +18,7 @@ from funkwhale_api.common import session
|
|||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.common import validators as common_validators
|
||||
from funkwhale_api.music import utils as music_utils
|
||||
from funkwhale_api.musicbrainz import serializers as musicbrainz_serializers
|
||||
|
||||
from . import utils as federation_utils
|
||||
|
||||
|
@ -411,8 +412,14 @@ class Fetch(models.Model):
|
|||
contexts.AS.Organization: [serializers.ActorSerializer],
|
||||
contexts.AS.Service: [serializers.ActorSerializer],
|
||||
contexts.AS.Application: [serializers.ActorSerializer],
|
||||
# for mb the key must be the api namespace
|
||||
"recordings": [musicbrainz_serializers.RecordingSerializer],
|
||||
}
|
||||
|
||||
@property
|
||||
def supported_services(self):
|
||||
return ["musicbrainz.org"]
|
||||
|
||||
|
||||
class InboxItem(models.Model):
|
||||
"""
|
||||
|
|
|
@ -2,6 +2,8 @@ import datetime
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from django.conf import settings
|
||||
|
@ -13,6 +15,7 @@ from django.utils import timezone
|
|||
from dynamic_preferences.registries import global_preferences_registry
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
from funkwhale_api import musicbrainz
|
||||
from funkwhale_api.audio import models as audio_models
|
||||
from funkwhale_api.common import models as common_models
|
||||
from funkwhale_api.common import preferences, session
|
||||
|
@ -456,6 +459,137 @@ def fetch(fetch_obj):
|
|||
)
|
||||
|
||||
|
||||
def musicbrainz_type_handler(fetch):
|
||||
url = fetch.url
|
||||
path_parts = urlparse(url).path.strip("/").split("/")
|
||||
type_ = path_parts[0] + "s"
|
||||
mbid = path_parts[1]
|
||||
try:
|
||||
uuid.UUID(mbid)
|
||||
except ValueError:
|
||||
raise ValueError(f"could no get mbid from url {url}")
|
||||
return type_, mbid
|
||||
|
||||
|
||||
def musicbrainz_metadata_handler(type_, id):
|
||||
def replace_hyphens_in_keys(obj):
|
||||
if isinstance(obj, dict):
|
||||
return {
|
||||
k.replace("-", "_"): replace_hyphens_in_keys(v) for k, v in obj.items()
|
||||
}
|
||||
elif isinstance(obj, list):
|
||||
return [replace_hyphens_in_keys(item) for item in obj]
|
||||
else:
|
||||
return obj
|
||||
|
||||
result = replace_hyphens_in_keys(
|
||||
getattr(musicbrainz.api, type_).get(
|
||||
id=id, includes=["tags", "artists", "releases"]
|
||||
)
|
||||
)
|
||||
|
||||
existing = (
|
||||
music_models.Track.objects.filter(mbid=id).first()
|
||||
if music_models.Track.objects.filter(mbid=id).exists()
|
||||
else None
|
||||
)
|
||||
return result, existing
|
||||
|
||||
|
||||
type_and_id_from_third_party = {"musicbrainz.org": musicbrainz_type_handler}
|
||||
metadata_from_third_party_ = {"musicbrainz.org": musicbrainz_metadata_handler}
|
||||
|
||||
|
||||
@celery.app.task(name="third_party_fetch")
|
||||
@transaction.atomic
|
||||
@celery.require_instance(
|
||||
models.Fetch.objects.filter(status="pending").select_related("actor"),
|
||||
"fetch_obj",
|
||||
"fetch_id",
|
||||
)
|
||||
def third_party_fetch(fetch_obj):
|
||||
def error(code, **kwargs):
|
||||
fetch_obj.status = "errored"
|
||||
fetch_obj.fetch_date = timezone.now()
|
||||
fetch_obj.detail = {"error_code": code}
|
||||
fetch_obj.detail.update(kwargs)
|
||||
fetch_obj.save(update_fields=["fetch_date", "status", "detail"])
|
||||
|
||||
def check_url(url):
|
||||
if not url.startswith("webfinger://"):
|
||||
payload, updated = mrf.inbox.apply({"id": url})
|
||||
if not payload:
|
||||
return error("blocked", message="Blocked by MRF")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
service = parsed_url.netloc
|
||||
if service not in fetch_obj.supported_services:
|
||||
return error("invalid_url", message=f"Unsupported domain {service}")
|
||||
return service
|
||||
|
||||
url = fetch_obj.url
|
||||
actor = fetch_obj.actor
|
||||
service = check_url(url)
|
||||
|
||||
try:
|
||||
type_, id = type_and_id_from_third_party[service](fetch_obj)
|
||||
logger.debug("Parsed URL %s into type %s and id %s", url, type_, id)
|
||||
except ValueError as e:
|
||||
return error("url_parse_error", message=e.message)
|
||||
|
||||
try:
|
||||
result, existing = metadata_from_third_party_[service](type_, id)
|
||||
logger.debug(
|
||||
f"Remote answered with {result} and we found {existing} in database"
|
||||
)
|
||||
except requests.exceptions.HTTPError as e:
|
||||
return error(
|
||||
"http",
|
||||
status_code=e.response.status_code if e.response else None,
|
||||
message=e.response.text,
|
||||
)
|
||||
except requests.exceptions.Timeout:
|
||||
return error("timeout")
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
return error("connection", message=str(e))
|
||||
except requests.RequestException as e:
|
||||
return error("request", message=str(e))
|
||||
except Exception as e:
|
||||
return error("unhandled", message=str(e))
|
||||
|
||||
try:
|
||||
serializer_classes = fetch_obj.serializers.get(type_)
|
||||
except (KeyError, AttributeError):
|
||||
fetch_obj.status = "skipped"
|
||||
fetch_obj.fetch_date = timezone.now()
|
||||
fetch_obj.detail = {"reason": "unhandled_type", "type": type_}
|
||||
return fetch_obj.save(update_fields=["fetch_date", "status", "detail"])
|
||||
|
||||
serializer = None
|
||||
for serializer_class in serializer_classes:
|
||||
serializer = serializer_class(
|
||||
existing, data=result, context={"fetch_actor": actor}
|
||||
)
|
||||
if not serializer.is_valid():
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if serializer.errors:
|
||||
return error("validation", validation_errors=serializer.errors)
|
||||
try:
|
||||
obj = serializer.save()
|
||||
except Exception as e:
|
||||
error("save", message=str(e))
|
||||
raise
|
||||
|
||||
fetch_obj.object = obj
|
||||
fetch_obj.status = "finished"
|
||||
fetch_obj.fetch_date = timezone.now()
|
||||
return fetch_obj.save(
|
||||
update_fields=["fetch_date", "status", "object_id", "object_content_type"]
|
||||
)
|
||||
|
||||
|
||||
class PreserveSomeDataCollector(Collector):
|
||||
"""
|
||||
We need to delete everything related to an actor. Well… Almost everything.
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
from rest_framework import serializers
|
||||
|
||||
from funkwhale_api.tags import models as tags_models
|
||||
|
||||
|
||||
class ArtistSerializer(serializers.Serializer):
|
||||
"""
|
||||
Serializer for Musicbrainz artist data.
|
||||
"""
|
||||
|
||||
id = serializers.CharField()
|
||||
name = serializers.CharField()
|
||||
|
||||
def create(self, validated_data):
|
||||
from funkwhale_api.music.models import Artist
|
||||
|
||||
data = {
|
||||
"name": validated_data["name"],
|
||||
"mbid": validated_data["id"],
|
||||
}
|
||||
artist = Artist.objects.create(**data)
|
||||
return artist
|
||||
|
||||
|
||||
class ArtistCreditSerializer(serializers.Serializer):
|
||||
"""
|
||||
Serializer for Musicbrainz artist data.
|
||||
"""
|
||||
|
||||
name = serializers.CharField()
|
||||
joinphrase = serializers.CharField(allow_blank=True)
|
||||
artist = ArtistSerializer()
|
||||
|
||||
def create(self, validated_data):
|
||||
from funkwhale_api.music.models import ArtistCredit
|
||||
|
||||
data = {
|
||||
"credit": validated_data["name"],
|
||||
"joinphrase": validated_data.get("joinphrase", ""),
|
||||
"artist": ArtistSerializer().create(validated_data["artist"]),
|
||||
}
|
||||
artist_credit = ArtistCredit.objects.create(**data)
|
||||
return artist_credit
|
||||
|
||||
|
||||
class ReleaseSerializer(serializers.Serializer):
|
||||
"""
|
||||
Serializer for Musicbrainz release data.
|
||||
"""
|
||||
|
||||
id = serializers.CharField()
|
||||
title = serializers.CharField()
|
||||
artist_credit = ArtistCreditSerializer(many=True)
|
||||
tags = serializers.ListField(child=serializers.CharField(), allow_empty=True)
|
||||
date = serializers.DateField(input_formats=["%Y", "%Y/%m/%d", "%Y-%m-%d"])
|
||||
|
||||
def create(self, validated_data):
|
||||
from funkwhale_api.music.models import Album
|
||||
|
||||
data = {
|
||||
"title": validated_data["title"],
|
||||
"mbid": validated_data["id"],
|
||||
"release_date": validated_data.get("date"),
|
||||
}
|
||||
album = Album.objects.create(**data)
|
||||
artist_credit = ArtistCreditSerializer(many=True).create(
|
||||
validated_data["artist_credit"]
|
||||
)
|
||||
album.artist_credit.set(artist_credit)
|
||||
album.save()
|
||||
tags_models.add_tags(album, *validated_data.get("tags", []))
|
||||
|
||||
return album
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
instance.title = validated_data["title"]
|
||||
instance.release_date = validated_data.get("date")
|
||||
instance.save()
|
||||
tags_models.add_tags(instance, *validated_data.get("tags", []))
|
||||
|
||||
return instance
|
||||
|
||||
|
||||
class RecordingSerializer(serializers.Serializer):
|
||||
"""
|
||||
Serializer for Musicbrainz track data.
|
||||
"""
|
||||
|
||||
# class Meta:
|
||||
# model = Track
|
||||
|
||||
id = serializers.CharField()
|
||||
title = serializers.CharField()
|
||||
artist_credit = ArtistCreditSerializer(many=True)
|
||||
releases = ReleaseSerializer(many=True)
|
||||
tags = serializers.ListField(child=serializers.CharField(), allow_empty=True)
|
||||
|
||||
def create(self, validated_data):
|
||||
from funkwhale_api.music.models import Track
|
||||
|
||||
data = {
|
||||
"title": validated_data["title"],
|
||||
"mbid": validated_data["id"],
|
||||
# In mb a recording can have various releases, we take the fist one
|
||||
"album": ReleaseSerializer(many=True).create(validated_data["releases"])[0],
|
||||
}
|
||||
track = Track.objects.create(**data)
|
||||
artist_credit = ArtistCreditSerializer(many=True).create(
|
||||
validated_data["artist_credit"]
|
||||
)
|
||||
track.artist_credit.set(artist_credit)
|
||||
track.save()
|
||||
|
||||
tags_models.add_tags(track, *validated_data.get("tags", []))
|
||||
|
||||
return track
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
instance.title = validated_data["title"]
|
||||
instance.save()
|
||||
tags_models.add_tags(instance, *validated_data.get("tags", []))
|
||||
|
||||
return instance
|
|
@ -732,3 +732,87 @@ def test_fetch_webfinger_create_actor(factories, r_mock, mocker):
|
|||
assert init.call_args[0][1] == actor
|
||||
assert init.call_args[1]["data"] == payload
|
||||
assert save.call_count == 1
|
||||
|
||||
|
||||
def test_third_party_fetch_success(factories, r_mock, mocker):
|
||||
track = factories["music.Track"]()
|
||||
url = f"https://musicbrainz.org/recording/{track.mbid}"
|
||||
fetch = factories["federation.Fetch"](url=url)
|
||||
payload = {
|
||||
"releases": [
|
||||
{
|
||||
"status": "Promotion",
|
||||
"id": "220ffb88-49ed-4df4-a330-46f8e7353ff0",
|
||||
"country": "DE",
|
||||
"title": "With Oi! To Hope for Myanmar 2022",
|
||||
"quality": "normal",
|
||||
"release-events": [
|
||||
{
|
||||
"area": {
|
||||
"name": "Germany",
|
||||
"id": "85752fda-13c4-31a3-bee5-0e5cb1f51dad",
|
||||
"sort-name": "Germany",
|
||||
"disambiguation": "",
|
||||
"iso-3166-1-codes": ["DE"],
|
||||
},
|
||||
"date": "2022",
|
||||
}
|
||||
],
|
||||
"disambiguation": "Version aus 2022",
|
||||
"status-id": "518ffc83-5cde-34df-8627-81bff5093d92",
|
||||
"packaging-id": "8f931351-d2e2-310f-afc6-37b89ddba246",
|
||||
"artist-credit": [
|
||||
{
|
||||
"artist": {
|
||||
"sort-name": "Various Artists",
|
||||
"name": "Various Artists",
|
||||
"disambiguation": "add compilations to this artist",
|
||||
"type": "Other",
|
||||
"type-id": "ac897045-5043-3294-969b-187360e45d86",
|
||||
"id": "89ad4ac3-39f7-470e-963a-56509c546377",
|
||||
},
|
||||
"joinphrase": "",
|
||||
"name": "Various Artists",
|
||||
}
|
||||
],
|
||||
"barcode": "",
|
||||
"date": "2022",
|
||||
"packaging": "Digipak",
|
||||
"text-representation": {"language": "mul", "script": "Latn"},
|
||||
}
|
||||
],
|
||||
"disambiguation": "",
|
||||
"video": False,
|
||||
"first-release-date": "2022",
|
||||
"artist-credit": [
|
||||
{
|
||||
"artist": {
|
||||
"name": "The Rebel Riot",
|
||||
"disambiguation": "",
|
||||
"sort-name": "Rebel Riot, The",
|
||||
"type": "Group",
|
||||
"id": "1ff2cd0c-2ac1-4296-b650-77ef57bb0d01",
|
||||
"country": "MM",
|
||||
"type-id": "e431f5f6-b5d2-343d-8b36-72607fffb74b",
|
||||
},
|
||||
"name": "The Rebel Riot",
|
||||
"joinphrase": "",
|
||||
}
|
||||
],
|
||||
"title": "A.C.A.B.",
|
||||
"id": "455cd030-7394-4244-9a53-3b96a666b1c6",
|
||||
"length": 193253,
|
||||
}
|
||||
init = mocker.spy(serializers.ArtistSerializer, "__init__")
|
||||
save = mocker.spy(serializers.ArtistSerializer, "save")
|
||||
|
||||
r_mock.get(url, json=payload)
|
||||
|
||||
tasks.third_party_fetch(fetch_id=fetch.pk)
|
||||
|
||||
fetch.refresh_from_db()
|
||||
|
||||
assert fetch.status == "finished"
|
||||
assert init.call_count == 1
|
||||
assert init.call_args[1]["data"] == payload
|
||||
assert save.call_count == 1
|
||||
|
|
Loading…
Reference in New Issue