From 0705467bf9290e1d0d110611841e211a9eb61964 Mon Sep 17 00:00:00 2001 From: petitminion Date: Wed, 5 Jun 2024 19:17:33 +0000 Subject: [PATCH] Add Musicbrainz genres to funkwhale tag table and allow Musicbrainz tag sync (#2143) --- api/config/settings/common.py | 5 ++ api/funkwhale_api/manage/filters.py | 8 ++ api/funkwhale_api/manage/views.py | 3 +- .../music/dynamic_preferences_registry.py | 29 ++++++ api/funkwhale_api/music/tasks.py | 12 +++ api/funkwhale_api/music/views.py | 9 +- api/funkwhale_api/tags/filters.py | 2 +- .../0003_tag_mbid_alter_tag_name.py | 32 +++++++ api/funkwhale_api/tags/models.py | 6 +- api/funkwhale_api/tags/tasks.py | 90 +++++++++++++++++++ api/funkwhale_api/tags/views.py | 2 + api/tests/tags/test_filters.py | 8 +- api/tests/tags/test_tasks.py | 52 ++++++++++- changes/changelog.d/2143.feature | 1 + front/src/views/admin/Settings.vue | 4 +- 15 files changed, 254 insertions(+), 9 deletions(-) create mode 100644 api/funkwhale_api/tags/migrations/0003_tag_mbid_alter_tag_name.py create mode 100644 changes/changelog.d/2143.feature diff --git a/api/config/settings/common.py b/api/config/settings/common.py index 4cbddfad1..b2fa0d715 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -960,6 +960,11 @@ CELERY_BEAT_SCHEDULE = { "schedule": crontab(day_of_week="*", minute="0", hour="3"), "options": {"expires": 60 * 60 * 24}, }, + "tags.update_musicbrainz_genre": { + "task": "tags.update_musicbrainz_genre", + "schedule": crontab(day_of_month="2", minute="30", hour="3"), + "options": {"expires": 60 * 60 * 24}, + }, } if env.str("TYPESENSE_API_KEY", default=None): diff --git a/api/funkwhale_api/manage/filters.py b/api/funkwhale_api/manage/filters.py index fb283de99..b3884a058 100644 --- a/api/funkwhale_api/manage/filters.py +++ b/api/funkwhale_api/manage/filters.py @@ -1,6 +1,7 @@ import django_filters from django import forms from django.db.models import Q +from django.db.models.functions import Collate from django_filters import rest_framework as filters from funkwhale_api.audio import models as audio_models @@ -370,6 +371,13 @@ class ManageTagFilterSet(filters.FilterSet): model = tags_models.Tag fields = [] + def get_queryset(self, request): + return ( + super() + .get_queryset(request) + .annotate(tag_deterministic=Collate("name", "und-x-icu")) + ) + class ManageReportFilterSet(filters.FilterSet): q = fields.SmartSearchFilter( diff --git a/api/funkwhale_api/manage/views.py b/api/funkwhale_api/manage/views.py index 4de9b5e96..0acad7246 100644 --- a/api/funkwhale_api/manage/views.py +++ b/api/funkwhale_api/manage/views.py @@ -1,6 +1,6 @@ from django.db import transaction from django.db.models import Count, OuterRef, Prefetch, Q, Subquery, Sum -from django.db.models.functions import Coalesce, Length +from django.db.models.functions import Coalesce, Collate, Length from django.shortcuts import get_object_or_404 from drf_spectacular.utils import extend_schema from rest_framework import decorators as rest_decorators @@ -579,6 +579,7 @@ class ManageTagViewSet( .order_by("-creation_date") .annotate(items_count=Count("tagged_items")) .annotate(length=Length("name")) + .annotate(tag_deterministic=Collate("name", "und-x-icu")) ) serializer_class = serializers.ManageTagSerializer filterset_class = filters.ManageTagFilterSet diff --git a/api/funkwhale_api/music/dynamic_preferences_registry.py b/api/funkwhale_api/music/dynamic_preferences_registry.py index 20890fde8..60d5d52f1 100644 --- a/api/funkwhale_api/music/dynamic_preferences_registry.py +++ b/api/funkwhale_api/music/dynamic_preferences_registry.py @@ -47,3 +47,32 @@ class MbidTaggedContent(types.BooleanPreference): "or enable quality filtering to hide untagged content from API calls. " ) default = False + + +@global_preferences_registry.register +class MbGenreTags(types.BooleanPreference): + show_in_api = True + section = music + name = "musicbrainz_genre_update" + verbose_name = "Prepopulate tags with MusicBrainz Genre " + help_text = ( + "Will trigger a monthly update of the tag table " + "using Musicbrainz genres. Non-existing tag will be created and " + "MusicBrainz Ids will be added to the tags if " + "they match the genre name." + ) + default = True + + +@global_preferences_registry.register +class MbSyncTags(types.BooleanPreference): + show_in_api = True + section = music + name = "sync_musicbrainz_tags" + verbose_name = "Sync MusicBrainz to to funkwhale objects" + help_text = ( + "If uploaded files are tagged with a MusicBrainz ID, " + "Funkwhale will query MusicBrainz server to add tags to " + "the track, artist and album objects." + ) + default = False diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index 54801967c..6a6ca252b 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -506,6 +506,7 @@ def truncate(v, length): def _get_track(data, attributed_to=None, **forced_values): + sync_mb_tag = preferences.get("music__sync_musicbrainz_tags") track_uuid = getter(data, "funkwhale", "track", "uuid") if track_uuid: @@ -642,6 +643,10 @@ def _get_track(data, attributed_to=None, **forced_values): common_utils.attach_file( album, "attachment_cover", album_data.get("cover_data") ) + + if sync_mb_tag and album_mbid: + tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(album) + else: album = None # get / create track @@ -724,10 +729,14 @@ def _get_track(data, attributed_to=None, **forced_values): common_utils.attach_content(track, "description", description) common_utils.attach_file(track, "attachment_cover", cover_data) + if sync_mb_tag and track_mbid: + tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(track) + return track def get_artist(artist_data, attributed_to, from_activity_id): + sync_mb_tag = preferences.get("music__sync_musicbrainz_tags") artist_mbid = artist_data.get("mbid", None) artist_fid = artist_data.get("fid", None) artist_name = artist_data["name"] @@ -759,6 +768,9 @@ def get_artist(artist_data, attributed_to, from_activity_id): common_utils.attach_file( artist, "attachment_cover", artist_data.get("cover_data") ) + if sync_mb_tag and artist_mbid: + tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(artist) + return artist diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py index 282ceca24..d060c6376 100644 --- a/api/funkwhale_api/music/views.py +++ b/api/funkwhale_api/music/views.py @@ -9,6 +9,7 @@ from django.conf import settings from django.core.cache import cache from django.db import transaction from django.db.models import Count, F, Prefetch, Q, Sum +from django.db.models.functions import Collate from django.utils import timezone from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view from rest_framework import mixins, renderers @@ -883,9 +884,13 @@ class Search(views.APIView): return common_utils.order_for_search(qs, "name")[: self.max_results] def get_tags(self, query): - search_fields = ["name__unaccent"] + search_fields = ["tag_deterministic"] query_obj = utils.get_query(query, search_fields) - qs = Tag.objects.all().filter(query_obj) + qs = ( + Tag.objects.all() + .annotate(tag_deterministic=Collate("name", "und-x-icu")) + .filter(query_obj) + ) return common_utils.order_for_search(qs, "name")[: self.max_results] diff --git a/api/funkwhale_api/tags/filters.py b/api/funkwhale_api/tags/filters.py index bcd6894af..4ef865927 100644 --- a/api/funkwhale_api/tags/filters.py +++ b/api/funkwhale_api/tags/filters.py @@ -8,7 +8,7 @@ from . import models class TagFilter(filters.FilterSet): - q = fields.SearchFilter(search_fields=["name"]) + q = fields.SearchFilter(search_fields=["tag_deterministic"]) ordering = django_filters.OrderingFilter( fields=( ("name", "name"), diff --git a/api/funkwhale_api/tags/migrations/0003_tag_mbid_alter_tag_name.py b/api/funkwhale_api/tags/migrations/0003_tag_mbid_alter_tag_name.py new file mode 100644 index 000000000..f43b2f36a --- /dev/null +++ b/api/funkwhale_api/tags/migrations/0003_tag_mbid_alter_tag_name.py @@ -0,0 +1,32 @@ +# Generated by Django 4.2.9 on 2024-06-02 13:14 + +from django.db import migrations, models +from django.contrib.postgres.operations import CreateCollation +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("tags", "0002_auto_20200803_1222"), + ] + + operations = [ + CreateCollation( + "case_insensitive", + provider="icu", + locale="und-u-ks-level2", + deterministic=False, + ), + migrations.AddField( + model_name="tag", + name="mbid", + field=models.UUIDField(blank=True, db_index=True, null=True, unique=True), + ), + migrations.AlterField( + model_name="tag", + name="name", + field=models.CharField( + db_collation="case_insensitive", max_length=100, unique=True + ), + ), + ] diff --git a/api/funkwhale_api/tags/models.py b/api/funkwhale_api/tags/models.py index 55c56ab08..bea45c9a8 100644 --- a/api/funkwhale_api/tags/models.py +++ b/api/funkwhale_api/tags/models.py @@ -3,7 +3,6 @@ import re from django.conf import settings from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType -from django.contrib.postgres.fields import CICharField from django.db import models, transaction from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -12,7 +11,10 @@ TAG_REGEX = re.compile(r"^((\w+)([\d_]*))$") class Tag(models.Model): - name = CICharField(max_length=100, unique=True) + name = models.CharField( + max_length=100, unique=True, db_collation="case_insensitive" + ) + mbid = models.UUIDField(null=True, db_index=True, blank=True, unique=True) creation_date = models.DateTimeField(default=timezone.now) def __str__(self): diff --git a/api/funkwhale_api/tags/tasks.py b/api/funkwhale_api/tags/tasks.py index 4bc6d59ba..8ec45d3b4 100644 --- a/api/funkwhale_api/tags/tasks.py +++ b/api/funkwhale_api/tags/tasks.py @@ -1,9 +1,17 @@ import collections +import logging +import time +import requests from django.contrib.contenttypes.models import ContentType +from funkwhale_api import musicbrainz +from funkwhale_api.taskapp import celery + from . import models +logger = logging.getLogger(__name__) + def get_tags_from_foreign_key( ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items" @@ -50,3 +58,85 @@ def add_tags_batch(data, model, tagged_items_attr="tagged_items"): ] return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000) + + +BASE_URL = "https://musicbrainz.org/ws/2/genre/all" +HEADERS = {"Accept": "application/json"} + + +def fetch_musicbrainz_genre(): + genres = [] + limit = 100 # Maximum limit per request + offset = 0 + + while True: + response = requests.get( + BASE_URL, headers=HEADERS, params={"limit": limit, "offset": offset} + ) + + if "Your requests are exceeding the allowable rate limit" in { + response._content + }: + time.sleep(10) + response = requests.get( + BASE_URL, headers=HEADERS, params={"limit": limit, "offset": offset} + ) + if response.status_code != 200: + logger.info(f"Failed to fetch mb genre: {response._content}") + break + elif response.status_code != 200: + logger.info(f"Failed to fetch mb genre: {response._content}") + break + + data = response.json() + genres.extend(data["genres"]) + + # Check if we have fetched all genres + if offset + limit >= data["genre-count"]: + break + + offset += limit + # mb only allow one request per second + time.sleep(1) + + return genres + + +@celery.app.task(name="tags.update_musicbrainz_genre") +def update_musicbrainz_genre(): + tags_mbid = models.Tag.objects.all().values_list("mbid", flat=True) + genres = fetch_musicbrainz_genre() + for genre in genres: + if genre["id"] in tags_mbid: + continue + + create_defaults = {"name": genre["name"], "mbid": genre["id"]} + models.Tag.objects.update_or_create( + name=genre["name"], + defaults=create_defaults, + ) + + +def sync_fw_item_tag_with_musicbrainz_tags(obj): + if obj.__class__.__name__ == "Track": + response = musicbrainz.api.recordings.get(id=obj.mbid, includes=["tags"]) + mb_obj_type = "recording" + elif obj.__class__.__name__ == "Album": + response = musicbrainz.api.releases.get( + id=obj.mbid, includes=["tags", "release-groups"] + ) + mb_obj_type = "release" + if mbid := response["release"].get("release-group", {}).get("id", False): + response["release"]["tag-list"].extend( + musicbrainz.api.release_groups.get(id=mbid, includes=["tags"])[ + "release-group" + ]["tag-list"] + ) + + elif obj.__class__.__name__ == "Artist": + response = musicbrainz.api.artists.get(id=obj.mbid, includes=["tags"]) + mb_obj_type = "artist" + + tags = [t["name"] for t in response[mb_obj_type]["tag-list"]] + + models.add_tags(obj, *tags) diff --git a/api/funkwhale_api/tags/views.py b/api/funkwhale_api/tags/views.py index 91c94840c..30670f227 100644 --- a/api/funkwhale_api/tags/views.py +++ b/api/funkwhale_api/tags/views.py @@ -1,5 +1,6 @@ import django_filters.rest_framework from django.db.models import functions +from django.db.models.functions import Collate from rest_framework import viewsets from funkwhale_api.users.oauth import permissions as oauth_permissions @@ -12,6 +13,7 @@ class TagViewSet(viewsets.ReadOnlyModelViewSet): queryset = ( models.Tag.objects.all() .annotate(__size=functions.Length("name")) + .annotate(tag_deterministic=Collate("name", "und-x-icu")) .order_by("name") ) serializer_class = serializers.TagSerializer diff --git a/api/tests/tags/test_filters.py b/api/tests/tags/test_filters.py index 970350f56..a24841400 100644 --- a/api/tests/tags/test_filters.py +++ b/api/tests/tags/test_filters.py @@ -1,3 +1,5 @@ +from django.db.models.functions import Collate + from funkwhale_api.tags import filters, models @@ -9,7 +11,11 @@ def test_filter_search_tag(factories, queryset_equal_list): ] factories["tags.Tag"](name="TestTag") factories["tags.Tag"](name="TestTag2") - qs = models.Tag.objects.all().order_by("name") + qs = ( + models.Tag.objects.all() + .annotate(tag_deterministic=Collate("name", "und-x-icu")) + .order_by("name") + ) filterset = filters.TagFilter({"q": "tag1"}, queryset=qs) assert filterset.qs == matches diff --git a/api/tests/tags/test_tasks.py b/api/tests/tags/test_tasks.py index 2dab64544..6264b49ce 100644 --- a/api/tests/tags/test_tasks.py +++ b/api/tests/tags/test_tasks.py @@ -1,5 +1,5 @@ from funkwhale_api.music import models as music_models -from funkwhale_api.tags import tasks +from funkwhale_api.tags import models, tasks def test_get_tags_from_foreign_key(factories): @@ -34,3 +34,53 @@ def test_add_tags_batch(factories): ) assert artist.get_tags() == ["Rap", "Rock"] + + +def test_update_musicbrainz_genre(factories, mocker): + tag1 = factories["tags.Tag"](mbid="2628c282-9075-4736-b1f9-7012404d09e8") + tag2 = factories["tags.Tag"](mbid=None) + factories["tags.Tag"]() + factories["tags.Tag"]() + mb_genre = [ + {"name": "dnb", "id": "aaaac282-9075-4736-b1f9-7012404daaaa"}, + {"name": tag1.name, "id": "2628c282-9075-4736-b1f9-7012404d09e8"}, + {"name": tag2.name, "id": "2628c282-9075-4736-b1f9-7012404daaaa"}, + ] + mocker.patch( + "funkwhale_api.tags.tasks.fetch_musicbrainz_genre", return_value=mb_genre + ) + tasks.update_musicbrainz_genre() + + assert ( + str(models.Tag.objects.get(name="dnb").mbid) + == "aaaac282-9075-4736-b1f9-7012404daaaa" + ) + assert ( + str(models.Tag.objects.get(name=tag2.name).mbid) + == "2628c282-9075-4736-b1f9-7012404daaaa" + ) + assert ( + str(models.Tag.objects.get(name=tag1.name).mbid) + == "2628c282-9075-4736-b1f9-7012404d09e8" + ) + + +def test_sync_musicbrainz_tags(factories, mocker): + objs = [ + factories["music.Artist"](mbid="2628c282-9075-4736-b1f9-7012404daaaa"), + factories["music.Track"](mbid="2628c282-9075-4736-b1f9-7012404daaaa"), + factories["music.Album"](mbid="2628c282-9075-4736-b1f9-7012404dacab"), + ] + obj_map = {"Artist": "artists", "Track": "recordings", "Album": "releases"} + for obj in objs: + obj_type = obj.__class__.__name__ + mocker.patch( + f"funkwhale_api.tags.tasks.musicbrainz.api.{obj_map[obj_type]}.get", + return_value={ + obj_map[obj_type][:-1]: {"tag-list": [{"name": "Amazing Tag"}]} + }, + ) + + tasks.sync_fw_item_tag_with_musicbrainz_tags(obj) + obj.refresh_from_db() + assert obj.tagged_items.all()[0].tag.name == "Amazing Tag" diff --git a/changes/changelog.d/2143.feature b/changes/changelog.d/2143.feature new file mode 100644 index 000000000..7b9a33bc2 --- /dev/null +++ b/changes/changelog.d/2143.feature @@ -0,0 +1 @@ +Add Musicbrainz genres to funkwhale tag table and allow Musicbrainz tag sync (#2143) diff --git a/front/src/views/admin/Settings.vue b/front/src/views/admin/Settings.vue index d85b0a976..5a92c3847 100644 --- a/front/src/views/admin/Settings.vue +++ b/front/src/views/admin/Settings.vue @@ -58,7 +58,9 @@ const groups = computed(() => [ settings: [ { name: 'music__transcoding_enabled' }, { name: 'music__transcoding_cache_duration' }, - { name: 'music__only_allow_musicbrainz_tagged_files' } + { name: 'music__only_allow_musicbrainz_tagged_files' }, + { name: 'music__sync_musicbrainz_tags' }, + { name: 'music__musicbrainz_genre_update' } ] },