Add Musicbrainz genres to funkwhale tag table and allow Musicbrainz tag sync (#2143)

This commit is contained in:
petitminion 2024-06-05 19:17:33 +00:00
parent 13f6571ad0
commit 0705467bf9
15 changed files with 254 additions and 9 deletions

View File

@ -960,6 +960,11 @@ CELERY_BEAT_SCHEDULE = {
"schedule": crontab(day_of_week="*", minute="0", hour="3"),
"options": {"expires": 60 * 60 * 24},
},
"tags.update_musicbrainz_genre": {
"task": "tags.update_musicbrainz_genre",
"schedule": crontab(day_of_month="2", minute="30", hour="3"),
"options": {"expires": 60 * 60 * 24},
},
}
if env.str("TYPESENSE_API_KEY", default=None):

View File

@ -1,6 +1,7 @@
import django_filters
from django import forms
from django.db.models import Q
from django.db.models.functions import Collate
from django_filters import rest_framework as filters
from funkwhale_api.audio import models as audio_models
@ -370,6 +371,13 @@ class ManageTagFilterSet(filters.FilterSet):
model = tags_models.Tag
fields = []
def get_queryset(self, request):
return (
super()
.get_queryset(request)
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
)
class ManageReportFilterSet(filters.FilterSet):
q = fields.SmartSearchFilter(

View File

@ -1,6 +1,6 @@
from django.db import transaction
from django.db.models import Count, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models.functions import Coalesce, Length
from django.db.models.functions import Coalesce, Collate, Length
from django.shortcuts import get_object_or_404
from drf_spectacular.utils import extend_schema
from rest_framework import decorators as rest_decorators
@ -579,6 +579,7 @@ class ManageTagViewSet(
.order_by("-creation_date")
.annotate(items_count=Count("tagged_items"))
.annotate(length=Length("name"))
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
)
serializer_class = serializers.ManageTagSerializer
filterset_class = filters.ManageTagFilterSet

View File

@ -47,3 +47,32 @@ class MbidTaggedContent(types.BooleanPreference):
"or enable quality filtering to hide untagged content from API calls. "
)
default = False
@global_preferences_registry.register
class MbGenreTags(types.BooleanPreference):
show_in_api = True
section = music
name = "musicbrainz_genre_update"
verbose_name = "Prepopulate tags with MusicBrainz Genre "
help_text = (
"Will trigger a monthly update of the tag table "
"using Musicbrainz genres. Non-existing tag will be created and "
"MusicBrainz Ids will be added to the tags if "
"they match the genre name."
)
default = True
@global_preferences_registry.register
class MbSyncTags(types.BooleanPreference):
show_in_api = True
section = music
name = "sync_musicbrainz_tags"
verbose_name = "Sync MusicBrainz to to funkwhale objects"
help_text = (
"If uploaded files are tagged with a MusicBrainz ID, "
"Funkwhale will query MusicBrainz server to add tags to "
"the track, artist and album objects."
)
default = False

View File

@ -506,6 +506,7 @@ def truncate(v, length):
def _get_track(data, attributed_to=None, **forced_values):
sync_mb_tag = preferences.get("music__sync_musicbrainz_tags")
track_uuid = getter(data, "funkwhale", "track", "uuid")
if track_uuid:
@ -642,6 +643,10 @@ def _get_track(data, attributed_to=None, **forced_values):
common_utils.attach_file(
album, "attachment_cover", album_data.get("cover_data")
)
if sync_mb_tag and album_mbid:
tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(album)
else:
album = None
# get / create track
@ -724,10 +729,14 @@ def _get_track(data, attributed_to=None, **forced_values):
common_utils.attach_content(track, "description", description)
common_utils.attach_file(track, "attachment_cover", cover_data)
if sync_mb_tag and track_mbid:
tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(track)
return track
def get_artist(artist_data, attributed_to, from_activity_id):
sync_mb_tag = preferences.get("music__sync_musicbrainz_tags")
artist_mbid = artist_data.get("mbid", None)
artist_fid = artist_data.get("fid", None)
artist_name = artist_data["name"]
@ -759,6 +768,9 @@ def get_artist(artist_data, attributed_to, from_activity_id):
common_utils.attach_file(
artist, "attachment_cover", artist_data.get("cover_data")
)
if sync_mb_tag and artist_mbid:
tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(artist)
return artist

View File

@ -9,6 +9,7 @@ from django.conf import settings
from django.core.cache import cache
from django.db import transaction
from django.db.models import Count, F, Prefetch, Q, Sum
from django.db.models.functions import Collate
from django.utils import timezone
from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view
from rest_framework import mixins, renderers
@ -883,9 +884,13 @@ class Search(views.APIView):
return common_utils.order_for_search(qs, "name")[: self.max_results]
def get_tags(self, query):
search_fields = ["name__unaccent"]
search_fields = ["tag_deterministic"]
query_obj = utils.get_query(query, search_fields)
qs = Tag.objects.all().filter(query_obj)
qs = (
Tag.objects.all()
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
.filter(query_obj)
)
return common_utils.order_for_search(qs, "name")[: self.max_results]

View File

@ -8,7 +8,7 @@ from . import models
class TagFilter(filters.FilterSet):
q = fields.SearchFilter(search_fields=["name"])
q = fields.SearchFilter(search_fields=["tag_deterministic"])
ordering = django_filters.OrderingFilter(
fields=(
("name", "name"),

View File

@ -0,0 +1,32 @@
# Generated by Django 4.2.9 on 2024-06-02 13:14
from django.db import migrations, models
from django.contrib.postgres.operations import CreateCollation
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("tags", "0002_auto_20200803_1222"),
]
operations = [
CreateCollation(
"case_insensitive",
provider="icu",
locale="und-u-ks-level2",
deterministic=False,
),
migrations.AddField(
model_name="tag",
name="mbid",
field=models.UUIDField(blank=True, db_index=True, null=True, unique=True),
),
migrations.AlterField(
model_name="tag",
name="name",
field=models.CharField(
db_collation="case_insensitive", max_length=100, unique=True
),
),
]

View File

@ -3,7 +3,6 @@ import re
from django.conf import settings
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType
from django.contrib.postgres.fields import CICharField
from django.db import models, transaction
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
@ -12,7 +11,10 @@ TAG_REGEX = re.compile(r"^((\w+)([\d_]*))$")
class Tag(models.Model):
name = CICharField(max_length=100, unique=True)
name = models.CharField(
max_length=100, unique=True, db_collation="case_insensitive"
)
mbid = models.UUIDField(null=True, db_index=True, blank=True, unique=True)
creation_date = models.DateTimeField(default=timezone.now)
def __str__(self):

View File

@ -1,9 +1,17 @@
import collections
import logging
import time
import requests
from django.contrib.contenttypes.models import ContentType
from funkwhale_api import musicbrainz
from funkwhale_api.taskapp import celery
from . import models
logger = logging.getLogger(__name__)
def get_tags_from_foreign_key(
ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items"
@ -50,3 +58,85 @@ def add_tags_batch(data, model, tagged_items_attr="tagged_items"):
]
return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000)
BASE_URL = "https://musicbrainz.org/ws/2/genre/all"
HEADERS = {"Accept": "application/json"}
def fetch_musicbrainz_genre():
genres = []
limit = 100 # Maximum limit per request
offset = 0
while True:
response = requests.get(
BASE_URL, headers=HEADERS, params={"limit": limit, "offset": offset}
)
if "Your requests are exceeding the allowable rate limit" in {
response._content
}:
time.sleep(10)
response = requests.get(
BASE_URL, headers=HEADERS, params={"limit": limit, "offset": offset}
)
if response.status_code != 200:
logger.info(f"Failed to fetch mb genre: {response._content}")
break
elif response.status_code != 200:
logger.info(f"Failed to fetch mb genre: {response._content}")
break
data = response.json()
genres.extend(data["genres"])
# Check if we have fetched all genres
if offset + limit >= data["genre-count"]:
break
offset += limit
# mb only allow one request per second
time.sleep(1)
return genres
@celery.app.task(name="tags.update_musicbrainz_genre")
def update_musicbrainz_genre():
tags_mbid = models.Tag.objects.all().values_list("mbid", flat=True)
genres = fetch_musicbrainz_genre()
for genre in genres:
if genre["id"] in tags_mbid:
continue
create_defaults = {"name": genre["name"], "mbid": genre["id"]}
models.Tag.objects.update_or_create(
name=genre["name"],
defaults=create_defaults,
)
def sync_fw_item_tag_with_musicbrainz_tags(obj):
if obj.__class__.__name__ == "Track":
response = musicbrainz.api.recordings.get(id=obj.mbid, includes=["tags"])
mb_obj_type = "recording"
elif obj.__class__.__name__ == "Album":
response = musicbrainz.api.releases.get(
id=obj.mbid, includes=["tags", "release-groups"]
)
mb_obj_type = "release"
if mbid := response["release"].get("release-group", {}).get("id", False):
response["release"]["tag-list"].extend(
musicbrainz.api.release_groups.get(id=mbid, includes=["tags"])[
"release-group"
]["tag-list"]
)
elif obj.__class__.__name__ == "Artist":
response = musicbrainz.api.artists.get(id=obj.mbid, includes=["tags"])
mb_obj_type = "artist"
tags = [t["name"] for t in response[mb_obj_type]["tag-list"]]
models.add_tags(obj, *tags)

View File

@ -1,5 +1,6 @@
import django_filters.rest_framework
from django.db.models import functions
from django.db.models.functions import Collate
from rest_framework import viewsets
from funkwhale_api.users.oauth import permissions as oauth_permissions
@ -12,6 +13,7 @@ class TagViewSet(viewsets.ReadOnlyModelViewSet):
queryset = (
models.Tag.objects.all()
.annotate(__size=functions.Length("name"))
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
.order_by("name")
)
serializer_class = serializers.TagSerializer

View File

@ -1,3 +1,5 @@
from django.db.models.functions import Collate
from funkwhale_api.tags import filters, models
@ -9,7 +11,11 @@ def test_filter_search_tag(factories, queryset_equal_list):
]
factories["tags.Tag"](name="TestTag")
factories["tags.Tag"](name="TestTag2")
qs = models.Tag.objects.all().order_by("name")
qs = (
models.Tag.objects.all()
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
.order_by("name")
)
filterset = filters.TagFilter({"q": "tag1"}, queryset=qs)
assert filterset.qs == matches

View File

@ -1,5 +1,5 @@
from funkwhale_api.music import models as music_models
from funkwhale_api.tags import tasks
from funkwhale_api.tags import models, tasks
def test_get_tags_from_foreign_key(factories):
@ -34,3 +34,53 @@ def test_add_tags_batch(factories):
)
assert artist.get_tags() == ["Rap", "Rock"]
def test_update_musicbrainz_genre(factories, mocker):
tag1 = factories["tags.Tag"](mbid="2628c282-9075-4736-b1f9-7012404d09e8")
tag2 = factories["tags.Tag"](mbid=None)
factories["tags.Tag"]()
factories["tags.Tag"]()
mb_genre = [
{"name": "dnb", "id": "aaaac282-9075-4736-b1f9-7012404daaaa"},
{"name": tag1.name, "id": "2628c282-9075-4736-b1f9-7012404d09e8"},
{"name": tag2.name, "id": "2628c282-9075-4736-b1f9-7012404daaaa"},
]
mocker.patch(
"funkwhale_api.tags.tasks.fetch_musicbrainz_genre", return_value=mb_genre
)
tasks.update_musicbrainz_genre()
assert (
str(models.Tag.objects.get(name="dnb").mbid)
== "aaaac282-9075-4736-b1f9-7012404daaaa"
)
assert (
str(models.Tag.objects.get(name=tag2.name).mbid)
== "2628c282-9075-4736-b1f9-7012404daaaa"
)
assert (
str(models.Tag.objects.get(name=tag1.name).mbid)
== "2628c282-9075-4736-b1f9-7012404d09e8"
)
def test_sync_musicbrainz_tags(factories, mocker):
objs = [
factories["music.Artist"](mbid="2628c282-9075-4736-b1f9-7012404daaaa"),
factories["music.Track"](mbid="2628c282-9075-4736-b1f9-7012404daaaa"),
factories["music.Album"](mbid="2628c282-9075-4736-b1f9-7012404dacab"),
]
obj_map = {"Artist": "artists", "Track": "recordings", "Album": "releases"}
for obj in objs:
obj_type = obj.__class__.__name__
mocker.patch(
f"funkwhale_api.tags.tasks.musicbrainz.api.{obj_map[obj_type]}.get",
return_value={
obj_map[obj_type][:-1]: {"tag-list": [{"name": "Amazing Tag"}]}
},
)
tasks.sync_fw_item_tag_with_musicbrainz_tags(obj)
obj.refresh_from_db()
assert obj.tagged_items.all()[0].tag.name == "Amazing Tag"

View File

@ -0,0 +1 @@
Add Musicbrainz genres to funkwhale tag table and allow Musicbrainz tag sync (#2143)

View File

@ -58,7 +58,9 @@ const groups = computed(() => [
settings: [
{ name: 'music__transcoding_enabled' },
{ name: 'music__transcoding_cache_duration' },
{ name: 'music__only_allow_musicbrainz_tagged_files' }
{ name: 'music__only_allow_musicbrainz_tagged_files' },
{ name: 'music__sync_musicbrainz_tags' },
{ name: 'music__musicbrainz_genre_update' }
]
},