WIP:newfeature(backend):fetch musicbrainz metadata from search bar
This commit is contained in:
parent
87e7297fae
commit
e1445c5637
|
@ -1,3 +1,5 @@
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import requests.exceptions
|
import requests.exceptions
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
@ -252,11 +254,18 @@ class FetchViewSet(
|
||||||
if fetch.status == "finished":
|
if fetch.status == "finished":
|
||||||
# a duplicate was returned, no need to fetch again
|
# a duplicate was returned, no need to fetch again
|
||||||
return
|
return
|
||||||
if settings.FEDERATION_SYNCHRONOUS_FETCH:
|
|
||||||
tasks.fetch(fetch_id=fetch.pk)
|
parsed_url = urlparse(fetch.url)
|
||||||
|
domain = parsed_url.netloc
|
||||||
|
if domain in fetch.supported_services:
|
||||||
|
tasks.third_party_fetch(fetch_id=fetch.pk)
|
||||||
fetch.refresh_from_db()
|
fetch.refresh_from_db()
|
||||||
else:
|
else:
|
||||||
common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk)
|
if settings.FEDERATION_SYNCHRONOUS_FETCH:
|
||||||
|
tasks.fetch(fetch_id=fetch.pk)
|
||||||
|
fetch.refresh_from_db()
|
||||||
|
else:
|
||||||
|
common_utils.on_commit(tasks.fetch.delay, fetch_id=fetch.pk)
|
||||||
|
|
||||||
|
|
||||||
class DomainViewSet(
|
class DomainViewSet(
|
||||||
|
|
|
@ -359,6 +359,22 @@ CONTEXTS = [
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"shortId": "MB",
|
||||||
|
"contextUrl": None,
|
||||||
|
"documentUrl": "http://musicbrainz.org/ns/mmd-1.0#",
|
||||||
|
"document": {
|
||||||
|
"@context": {
|
||||||
|
"mb": "http://musicbrainz.org/ns/mmd-1.0#",
|
||||||
|
"schema": "http://schema.org#",
|
||||||
|
"Recording": "schema:MusicRecording",
|
||||||
|
"name": "schema:name",
|
||||||
|
"duration": "schema:duration",
|
||||||
|
"@id": "@id",
|
||||||
|
"@type": "@type",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
CONTEXTS_BY_ID = {c["shortId"]: c for c in CONTEXTS}
|
CONTEXTS_BY_ID = {c["shortId"]: c for c in CONTEXTS}
|
||||||
|
@ -392,3 +408,4 @@ SEC = NS(CONTEXTS_BY_ID["SEC"])
|
||||||
FW = NS(CONTEXTS_BY_ID["FW"])
|
FW = NS(CONTEXTS_BY_ID["FW"])
|
||||||
SC = NS(CONTEXTS_BY_ID["SC"])
|
SC = NS(CONTEXTS_BY_ID["SC"])
|
||||||
LITEPUB = NS(CONTEXTS_BY_ID["LITEPUB"])
|
LITEPUB = NS(CONTEXTS_BY_ID["LITEPUB"])
|
||||||
|
MB = NS(CONTEXTS_BY_ID["MB"])
|
||||||
|
|
|
@ -18,6 +18,7 @@ from funkwhale_api.common import session
|
||||||
from funkwhale_api.common import utils as common_utils
|
from funkwhale_api.common import utils as common_utils
|
||||||
from funkwhale_api.common import validators as common_validators
|
from funkwhale_api.common import validators as common_validators
|
||||||
from funkwhale_api.music import utils as music_utils
|
from funkwhale_api.music import utils as music_utils
|
||||||
|
from funkwhale_api.musicbrainz import serializers as musicbrainz_serializers
|
||||||
|
|
||||||
from . import utils as federation_utils
|
from . import utils as federation_utils
|
||||||
|
|
||||||
|
@ -411,8 +412,14 @@ class Fetch(models.Model):
|
||||||
contexts.AS.Organization: [serializers.ActorSerializer],
|
contexts.AS.Organization: [serializers.ActorSerializer],
|
||||||
contexts.AS.Service: [serializers.ActorSerializer],
|
contexts.AS.Service: [serializers.ActorSerializer],
|
||||||
contexts.AS.Application: [serializers.ActorSerializer],
|
contexts.AS.Application: [serializers.ActorSerializer],
|
||||||
|
# for mb the key must be the api namespace
|
||||||
|
"recordings": [musicbrainz_serializers.RecordingSerializer],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supported_services(self):
|
||||||
|
return ["musicbrainz.org"]
|
||||||
|
|
||||||
|
|
||||||
class InboxItem(models.Model):
|
class InboxItem(models.Model):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -2,6 +2,8 @@ import datetime
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import uuid
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
@ -13,6 +15,7 @@ from django.utils import timezone
|
||||||
from dynamic_preferences.registries import global_preferences_registry
|
from dynamic_preferences.registries import global_preferences_registry
|
||||||
from requests.exceptions import RequestException
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
|
from funkwhale_api import musicbrainz
|
||||||
from funkwhale_api.audio import models as audio_models
|
from funkwhale_api.audio import models as audio_models
|
||||||
from funkwhale_api.common import models as common_models
|
from funkwhale_api.common import models as common_models
|
||||||
from funkwhale_api.common import preferences, session
|
from funkwhale_api.common import preferences, session
|
||||||
|
@ -456,6 +459,137 @@ def fetch(fetch_obj):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def musicbrainz_type_handler(fetch):
|
||||||
|
url = fetch.url
|
||||||
|
path_parts = urlparse(url).path.strip("/").split("/")
|
||||||
|
type_ = path_parts[0] + "s"
|
||||||
|
mbid = path_parts[1]
|
||||||
|
try:
|
||||||
|
uuid.UUID(mbid)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"could no get mbid from url {url}")
|
||||||
|
return type_, mbid
|
||||||
|
|
||||||
|
|
||||||
|
def musicbrainz_metadata_handler(type_, id):
|
||||||
|
def replace_hyphens_in_keys(obj):
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {
|
||||||
|
k.replace("-", "_"): replace_hyphens_in_keys(v) for k, v in obj.items()
|
||||||
|
}
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
return [replace_hyphens_in_keys(item) for item in obj]
|
||||||
|
else:
|
||||||
|
return obj
|
||||||
|
|
||||||
|
result = replace_hyphens_in_keys(
|
||||||
|
getattr(musicbrainz.api, type_).get(
|
||||||
|
id=id, includes=["tags", "artists", "releases"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
existing = (
|
||||||
|
music_models.Track.objects.filter(mbid=id).first()
|
||||||
|
if music_models.Track.objects.filter(mbid=id).exists()
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
return result, existing
|
||||||
|
|
||||||
|
|
||||||
|
type_and_id_from_third_party = {"musicbrainz.org": musicbrainz_type_handler}
|
||||||
|
metadata_from_third_party_ = {"musicbrainz.org": musicbrainz_metadata_handler}
|
||||||
|
|
||||||
|
|
||||||
|
@celery.app.task(name="third_party_fetch")
|
||||||
|
@transaction.atomic
|
||||||
|
@celery.require_instance(
|
||||||
|
models.Fetch.objects.filter(status="pending").select_related("actor"),
|
||||||
|
"fetch_obj",
|
||||||
|
"fetch_id",
|
||||||
|
)
|
||||||
|
def third_party_fetch(fetch_obj):
|
||||||
|
def error(code, **kwargs):
|
||||||
|
fetch_obj.status = "errored"
|
||||||
|
fetch_obj.fetch_date = timezone.now()
|
||||||
|
fetch_obj.detail = {"error_code": code}
|
||||||
|
fetch_obj.detail.update(kwargs)
|
||||||
|
fetch_obj.save(update_fields=["fetch_date", "status", "detail"])
|
||||||
|
|
||||||
|
def check_url(url):
|
||||||
|
if not url.startswith("webfinger://"):
|
||||||
|
payload, updated = mrf.inbox.apply({"id": url})
|
||||||
|
if not payload:
|
||||||
|
return error("blocked", message="Blocked by MRF")
|
||||||
|
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
service = parsed_url.netloc
|
||||||
|
if service not in fetch_obj.supported_services:
|
||||||
|
return error("invalid_url", message=f"Unsupported domain {service}")
|
||||||
|
return service
|
||||||
|
|
||||||
|
url = fetch_obj.url
|
||||||
|
actor = fetch_obj.actor
|
||||||
|
service = check_url(url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
type_, id = type_and_id_from_third_party[service](fetch_obj)
|
||||||
|
logger.debug("Parsed URL %s into type %s and id %s", url, type_, id)
|
||||||
|
except ValueError as e:
|
||||||
|
return error("url_parse_error", message=e.message)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result, existing = metadata_from_third_party_[service](type_, id)
|
||||||
|
logger.debug(
|
||||||
|
f"Remote answered with {result} and we found {existing} in database"
|
||||||
|
)
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
return error(
|
||||||
|
"http",
|
||||||
|
status_code=e.response.status_code if e.response else None,
|
||||||
|
message=e.response.text,
|
||||||
|
)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return error("timeout")
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
return error("connection", message=str(e))
|
||||||
|
except requests.RequestException as e:
|
||||||
|
return error("request", message=str(e))
|
||||||
|
except Exception as e:
|
||||||
|
return error("unhandled", message=str(e))
|
||||||
|
|
||||||
|
try:
|
||||||
|
serializer_classes = fetch_obj.serializers.get(type_)
|
||||||
|
except (KeyError, AttributeError):
|
||||||
|
fetch_obj.status = "skipped"
|
||||||
|
fetch_obj.fetch_date = timezone.now()
|
||||||
|
fetch_obj.detail = {"reason": "unhandled_type", "type": type_}
|
||||||
|
return fetch_obj.save(update_fields=["fetch_date", "status", "detail"])
|
||||||
|
|
||||||
|
serializer = None
|
||||||
|
for serializer_class in serializer_classes:
|
||||||
|
serializer = serializer_class(
|
||||||
|
existing, data=result, context={"fetch_actor": actor}
|
||||||
|
)
|
||||||
|
if not serializer.is_valid():
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if serializer.errors:
|
||||||
|
return error("validation", validation_errors=serializer.errors)
|
||||||
|
try:
|
||||||
|
obj = serializer.save()
|
||||||
|
except Exception as e:
|
||||||
|
error("save", message=str(e))
|
||||||
|
raise
|
||||||
|
|
||||||
|
fetch_obj.object = obj
|
||||||
|
fetch_obj.status = "finished"
|
||||||
|
fetch_obj.fetch_date = timezone.now()
|
||||||
|
return fetch_obj.save(
|
||||||
|
update_fields=["fetch_date", "status", "object_id", "object_content_type"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PreserveSomeDataCollector(Collector):
|
class PreserveSomeDataCollector(Collector):
|
||||||
"""
|
"""
|
||||||
We need to delete everything related to an actor. Well… Almost everything.
|
We need to delete everything related to an actor. Well… Almost everything.
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
from rest_framework import serializers
|
||||||
|
|
||||||
|
from funkwhale_api.tags import models as tags_models
|
||||||
|
|
||||||
|
|
||||||
|
class ArtistSerializer(serializers.Serializer):
|
||||||
|
"""
|
||||||
|
Serializer for Musicbrainz artist data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id = serializers.CharField()
|
||||||
|
name = serializers.CharField()
|
||||||
|
|
||||||
|
def create(self, validated_data):
|
||||||
|
from funkwhale_api.music.models import Artist
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"name": validated_data["name"],
|
||||||
|
"mbid": validated_data["id"],
|
||||||
|
}
|
||||||
|
artist = Artist.objects.create(**data)
|
||||||
|
return artist
|
||||||
|
|
||||||
|
|
||||||
|
class ArtistCreditSerializer(serializers.Serializer):
|
||||||
|
"""
|
||||||
|
Serializer for Musicbrainz artist data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = serializers.CharField()
|
||||||
|
joinphrase = serializers.CharField(allow_blank=True)
|
||||||
|
artist = ArtistSerializer()
|
||||||
|
|
||||||
|
def create(self, validated_data):
|
||||||
|
from funkwhale_api.music.models import ArtistCredit
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"credit": validated_data["name"],
|
||||||
|
"joinphrase": validated_data.get("joinphrase", ""),
|
||||||
|
"artist": ArtistSerializer().create(validated_data["artist"]),
|
||||||
|
}
|
||||||
|
artist_credit = ArtistCredit.objects.create(**data)
|
||||||
|
return artist_credit
|
||||||
|
|
||||||
|
|
||||||
|
class ReleaseSerializer(serializers.Serializer):
|
||||||
|
"""
|
||||||
|
Serializer for Musicbrainz release data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id = serializers.CharField()
|
||||||
|
title = serializers.CharField()
|
||||||
|
artist_credit = ArtistCreditSerializer(many=True)
|
||||||
|
tags = serializers.ListField(child=serializers.CharField(), allow_empty=True)
|
||||||
|
date = serializers.DateField(input_formats=["%Y", "%Y/%m/%d", "%Y-%m-%d"])
|
||||||
|
|
||||||
|
def create(self, validated_data):
|
||||||
|
from funkwhale_api.music.models import Album
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"title": validated_data["title"],
|
||||||
|
"mbid": validated_data["id"],
|
||||||
|
"release_date": validated_data.get("date"),
|
||||||
|
}
|
||||||
|
album = Album.objects.create(**data)
|
||||||
|
artist_credit = ArtistCreditSerializer(many=True).create(
|
||||||
|
validated_data["artist_credit"]
|
||||||
|
)
|
||||||
|
album.artist_credit.set(artist_credit)
|
||||||
|
album.save()
|
||||||
|
tags_models.add_tags(album, *validated_data.get("tags", []))
|
||||||
|
|
||||||
|
return album
|
||||||
|
|
||||||
|
def update(self, instance, validated_data):
|
||||||
|
instance.title = validated_data["title"]
|
||||||
|
instance.release_date = validated_data.get("date")
|
||||||
|
instance.save()
|
||||||
|
tags_models.add_tags(instance, *validated_data.get("tags", []))
|
||||||
|
|
||||||
|
return instance
|
||||||
|
|
||||||
|
|
||||||
|
class RecordingSerializer(serializers.Serializer):
|
||||||
|
"""
|
||||||
|
Serializer for Musicbrainz track data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# class Meta:
|
||||||
|
# model = Track
|
||||||
|
|
||||||
|
id = serializers.CharField()
|
||||||
|
title = serializers.CharField()
|
||||||
|
artist_credit = ArtistCreditSerializer(many=True)
|
||||||
|
releases = ReleaseSerializer(many=True)
|
||||||
|
tags = serializers.ListField(child=serializers.CharField(), allow_empty=True)
|
||||||
|
|
||||||
|
def create(self, validated_data):
|
||||||
|
from funkwhale_api.music.models import Track
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"title": validated_data["title"],
|
||||||
|
"mbid": validated_data["id"],
|
||||||
|
# In mb a recording can have various releases, we take the fist one
|
||||||
|
"album": ReleaseSerializer(many=True).create(validated_data["releases"])[0],
|
||||||
|
}
|
||||||
|
track = Track.objects.create(**data)
|
||||||
|
artist_credit = ArtistCreditSerializer(many=True).create(
|
||||||
|
validated_data["artist_credit"]
|
||||||
|
)
|
||||||
|
track.artist_credit.set(artist_credit)
|
||||||
|
track.save()
|
||||||
|
|
||||||
|
tags_models.add_tags(track, *validated_data.get("tags", []))
|
||||||
|
|
||||||
|
return track
|
||||||
|
|
||||||
|
def update(self, instance, validated_data):
|
||||||
|
instance.title = validated_data["title"]
|
||||||
|
instance.save()
|
||||||
|
tags_models.add_tags(instance, *validated_data.get("tags", []))
|
||||||
|
|
||||||
|
return instance
|
|
@ -732,3 +732,87 @@ def test_fetch_webfinger_create_actor(factories, r_mock, mocker):
|
||||||
assert init.call_args[0][1] == actor
|
assert init.call_args[0][1] == actor
|
||||||
assert init.call_args[1]["data"] == payload
|
assert init.call_args[1]["data"] == payload
|
||||||
assert save.call_count == 1
|
assert save.call_count == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_third_party_fetch_success(factories, r_mock, mocker):
|
||||||
|
track = factories["music.Track"]()
|
||||||
|
url = f"https://musicbrainz.org/recording/{track.mbid}"
|
||||||
|
fetch = factories["federation.Fetch"](url=url)
|
||||||
|
payload = {
|
||||||
|
"releases": [
|
||||||
|
{
|
||||||
|
"status": "Promotion",
|
||||||
|
"id": "220ffb88-49ed-4df4-a330-46f8e7353ff0",
|
||||||
|
"country": "DE",
|
||||||
|
"title": "With Oi! To Hope for Myanmar 2022",
|
||||||
|
"quality": "normal",
|
||||||
|
"release-events": [
|
||||||
|
{
|
||||||
|
"area": {
|
||||||
|
"name": "Germany",
|
||||||
|
"id": "85752fda-13c4-31a3-bee5-0e5cb1f51dad",
|
||||||
|
"sort-name": "Germany",
|
||||||
|
"disambiguation": "",
|
||||||
|
"iso-3166-1-codes": ["DE"],
|
||||||
|
},
|
||||||
|
"date": "2022",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"disambiguation": "Version aus 2022",
|
||||||
|
"status-id": "518ffc83-5cde-34df-8627-81bff5093d92",
|
||||||
|
"packaging-id": "8f931351-d2e2-310f-afc6-37b89ddba246",
|
||||||
|
"artist-credit": [
|
||||||
|
{
|
||||||
|
"artist": {
|
||||||
|
"sort-name": "Various Artists",
|
||||||
|
"name": "Various Artists",
|
||||||
|
"disambiguation": "add compilations to this artist",
|
||||||
|
"type": "Other",
|
||||||
|
"type-id": "ac897045-5043-3294-969b-187360e45d86",
|
||||||
|
"id": "89ad4ac3-39f7-470e-963a-56509c546377",
|
||||||
|
},
|
||||||
|
"joinphrase": "",
|
||||||
|
"name": "Various Artists",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"barcode": "",
|
||||||
|
"date": "2022",
|
||||||
|
"packaging": "Digipak",
|
||||||
|
"text-representation": {"language": "mul", "script": "Latn"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"disambiguation": "",
|
||||||
|
"video": False,
|
||||||
|
"first-release-date": "2022",
|
||||||
|
"artist-credit": [
|
||||||
|
{
|
||||||
|
"artist": {
|
||||||
|
"name": "The Rebel Riot",
|
||||||
|
"disambiguation": "",
|
||||||
|
"sort-name": "Rebel Riot, The",
|
||||||
|
"type": "Group",
|
||||||
|
"id": "1ff2cd0c-2ac1-4296-b650-77ef57bb0d01",
|
||||||
|
"country": "MM",
|
||||||
|
"type-id": "e431f5f6-b5d2-343d-8b36-72607fffb74b",
|
||||||
|
},
|
||||||
|
"name": "The Rebel Riot",
|
||||||
|
"joinphrase": "",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "A.C.A.B.",
|
||||||
|
"id": "455cd030-7394-4244-9a53-3b96a666b1c6",
|
||||||
|
"length": 193253,
|
||||||
|
}
|
||||||
|
init = mocker.spy(serializers.ArtistSerializer, "__init__")
|
||||||
|
save = mocker.spy(serializers.ArtistSerializer, "save")
|
||||||
|
|
||||||
|
r_mock.get(url, json=payload)
|
||||||
|
|
||||||
|
tasks.third_party_fetch(fetch_id=fetch.pk)
|
||||||
|
|
||||||
|
fetch.refresh_from_db()
|
||||||
|
|
||||||
|
assert fetch.status == "finished"
|
||||||
|
assert init.call_count == 1
|
||||||
|
assert init.call_args[1]["data"] == payload
|
||||||
|
assert save.call_count == 1
|
||||||
|
|
Loading…
Reference in New Issue