Add Musicbrainz genres to funkwhale tag table and allow Musicbrainz tag sync (#2143)

This commit is contained in:
petitminion 2024-06-05 19:17:33 +00:00
parent 13f6571ad0
commit 0705467bf9
15 changed files with 254 additions and 9 deletions

View File

@ -960,6 +960,11 @@ CELERY_BEAT_SCHEDULE = {
"schedule": crontab(day_of_week="*", minute="0", hour="3"), "schedule": crontab(day_of_week="*", minute="0", hour="3"),
"options": {"expires": 60 * 60 * 24}, "options": {"expires": 60 * 60 * 24},
}, },
"tags.update_musicbrainz_genre": {
"task": "tags.update_musicbrainz_genre",
"schedule": crontab(day_of_month="2", minute="30", hour="3"),
"options": {"expires": 60 * 60 * 24},
},
} }
if env.str("TYPESENSE_API_KEY", default=None): if env.str("TYPESENSE_API_KEY", default=None):

View File

@ -1,6 +1,7 @@
import django_filters import django_filters
from django import forms from django import forms
from django.db.models import Q from django.db.models import Q
from django.db.models.functions import Collate
from django_filters import rest_framework as filters from django_filters import rest_framework as filters
from funkwhale_api.audio import models as audio_models from funkwhale_api.audio import models as audio_models
@ -370,6 +371,13 @@ class ManageTagFilterSet(filters.FilterSet):
model = tags_models.Tag model = tags_models.Tag
fields = [] fields = []
def get_queryset(self, request):
return (
super()
.get_queryset(request)
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
)
class ManageReportFilterSet(filters.FilterSet): class ManageReportFilterSet(filters.FilterSet):
q = fields.SmartSearchFilter( q = fields.SmartSearchFilter(

View File

@ -1,6 +1,6 @@
from django.db import transaction from django.db import transaction
from django.db.models import Count, OuterRef, Prefetch, Q, Subquery, Sum from django.db.models import Count, OuterRef, Prefetch, Q, Subquery, Sum
from django.db.models.functions import Coalesce, Length from django.db.models.functions import Coalesce, Collate, Length
from django.shortcuts import get_object_or_404 from django.shortcuts import get_object_or_404
from drf_spectacular.utils import extend_schema from drf_spectacular.utils import extend_schema
from rest_framework import decorators as rest_decorators from rest_framework import decorators as rest_decorators
@ -579,6 +579,7 @@ class ManageTagViewSet(
.order_by("-creation_date") .order_by("-creation_date")
.annotate(items_count=Count("tagged_items")) .annotate(items_count=Count("tagged_items"))
.annotate(length=Length("name")) .annotate(length=Length("name"))
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
) )
serializer_class = serializers.ManageTagSerializer serializer_class = serializers.ManageTagSerializer
filterset_class = filters.ManageTagFilterSet filterset_class = filters.ManageTagFilterSet

View File

@ -47,3 +47,32 @@ class MbidTaggedContent(types.BooleanPreference):
"or enable quality filtering to hide untagged content from API calls. " "or enable quality filtering to hide untagged content from API calls. "
) )
default = False default = False
@global_preferences_registry.register
class MbGenreTags(types.BooleanPreference):
show_in_api = True
section = music
name = "musicbrainz_genre_update"
verbose_name = "Prepopulate tags with MusicBrainz Genre "
help_text = (
"Will trigger a monthly update of the tag table "
"using Musicbrainz genres. Non-existing tag will be created and "
"MusicBrainz Ids will be added to the tags if "
"they match the genre name."
)
default = True
@global_preferences_registry.register
class MbSyncTags(types.BooleanPreference):
show_in_api = True
section = music
name = "sync_musicbrainz_tags"
verbose_name = "Sync MusicBrainz to to funkwhale objects"
help_text = (
"If uploaded files are tagged with a MusicBrainz ID, "
"Funkwhale will query MusicBrainz server to add tags to "
"the track, artist and album objects."
)
default = False

View File

@ -506,6 +506,7 @@ def truncate(v, length):
def _get_track(data, attributed_to=None, **forced_values): def _get_track(data, attributed_to=None, **forced_values):
sync_mb_tag = preferences.get("music__sync_musicbrainz_tags")
track_uuid = getter(data, "funkwhale", "track", "uuid") track_uuid = getter(data, "funkwhale", "track", "uuid")
if track_uuid: if track_uuid:
@ -642,6 +643,10 @@ def _get_track(data, attributed_to=None, **forced_values):
common_utils.attach_file( common_utils.attach_file(
album, "attachment_cover", album_data.get("cover_data") album, "attachment_cover", album_data.get("cover_data")
) )
if sync_mb_tag and album_mbid:
tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(album)
else: else:
album = None album = None
# get / create track # get / create track
@ -724,10 +729,14 @@ def _get_track(data, attributed_to=None, **forced_values):
common_utils.attach_content(track, "description", description) common_utils.attach_content(track, "description", description)
common_utils.attach_file(track, "attachment_cover", cover_data) common_utils.attach_file(track, "attachment_cover", cover_data)
if sync_mb_tag and track_mbid:
tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(track)
return track return track
def get_artist(artist_data, attributed_to, from_activity_id): def get_artist(artist_data, attributed_to, from_activity_id):
sync_mb_tag = preferences.get("music__sync_musicbrainz_tags")
artist_mbid = artist_data.get("mbid", None) artist_mbid = artist_data.get("mbid", None)
artist_fid = artist_data.get("fid", None) artist_fid = artist_data.get("fid", None)
artist_name = artist_data["name"] artist_name = artist_data["name"]
@ -759,6 +768,9 @@ def get_artist(artist_data, attributed_to, from_activity_id):
common_utils.attach_file( common_utils.attach_file(
artist, "attachment_cover", artist_data.get("cover_data") artist, "attachment_cover", artist_data.get("cover_data")
) )
if sync_mb_tag and artist_mbid:
tags_tasks.sync_fw_item_tag_with_musicbrainz_tags(artist)
return artist return artist

View File

@ -9,6 +9,7 @@ from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from django.db import transaction from django.db import transaction
from django.db.models import Count, F, Prefetch, Q, Sum from django.db.models import Count, F, Prefetch, Q, Sum
from django.db.models.functions import Collate
from django.utils import timezone from django.utils import timezone
from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view
from rest_framework import mixins, renderers from rest_framework import mixins, renderers
@ -883,9 +884,13 @@ class Search(views.APIView):
return common_utils.order_for_search(qs, "name")[: self.max_results] return common_utils.order_for_search(qs, "name")[: self.max_results]
def get_tags(self, query): def get_tags(self, query):
search_fields = ["name__unaccent"] search_fields = ["tag_deterministic"]
query_obj = utils.get_query(query, search_fields) query_obj = utils.get_query(query, search_fields)
qs = Tag.objects.all().filter(query_obj) qs = (
Tag.objects.all()
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
.filter(query_obj)
)
return common_utils.order_for_search(qs, "name")[: self.max_results] return common_utils.order_for_search(qs, "name")[: self.max_results]

View File

@ -8,7 +8,7 @@ from . import models
class TagFilter(filters.FilterSet): class TagFilter(filters.FilterSet):
q = fields.SearchFilter(search_fields=["name"]) q = fields.SearchFilter(search_fields=["tag_deterministic"])
ordering = django_filters.OrderingFilter( ordering = django_filters.OrderingFilter(
fields=( fields=(
("name", "name"), ("name", "name"),

View File

@ -0,0 +1,32 @@
# Generated by Django 4.2.9 on 2024-06-02 13:14
from django.db import migrations, models
from django.contrib.postgres.operations import CreateCollation
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("tags", "0002_auto_20200803_1222"),
]
operations = [
CreateCollation(
"case_insensitive",
provider="icu",
locale="und-u-ks-level2",
deterministic=False,
),
migrations.AddField(
model_name="tag",
name="mbid",
field=models.UUIDField(blank=True, db_index=True, null=True, unique=True),
),
migrations.AlterField(
model_name="tag",
name="name",
field=models.CharField(
db_collation="case_insensitive", max_length=100, unique=True
),
),
]

View File

@ -3,7 +3,6 @@ import re
from django.conf import settings from django.conf import settings
from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.contrib.postgres.fields import CICharField
from django.db import models, transaction from django.db import models, transaction
from django.utils import timezone from django.utils import timezone
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
@ -12,7 +11,10 @@ TAG_REGEX = re.compile(r"^((\w+)([\d_]*))$")
class Tag(models.Model): class Tag(models.Model):
name = CICharField(max_length=100, unique=True) name = models.CharField(
max_length=100, unique=True, db_collation="case_insensitive"
)
mbid = models.UUIDField(null=True, db_index=True, blank=True, unique=True)
creation_date = models.DateTimeField(default=timezone.now) creation_date = models.DateTimeField(default=timezone.now)
def __str__(self): def __str__(self):

View File

@ -1,9 +1,17 @@
import collections import collections
import logging
import time
import requests
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from funkwhale_api import musicbrainz
from funkwhale_api.taskapp import celery
from . import models from . import models
logger = logging.getLogger(__name__)
def get_tags_from_foreign_key( def get_tags_from_foreign_key(
ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items" ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items"
@ -50,3 +58,85 @@ def add_tags_batch(data, model, tagged_items_attr="tagged_items"):
] ]
return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000) return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000)
BASE_URL = "https://musicbrainz.org/ws/2/genre/all"
HEADERS = {"Accept": "application/json"}
def fetch_musicbrainz_genre():
genres = []
limit = 100 # Maximum limit per request
offset = 0
while True:
response = requests.get(
BASE_URL, headers=HEADERS, params={"limit": limit, "offset": offset}
)
if "Your requests are exceeding the allowable rate limit" in {
response._content
}:
time.sleep(10)
response = requests.get(
BASE_URL, headers=HEADERS, params={"limit": limit, "offset": offset}
)
if response.status_code != 200:
logger.info(f"Failed to fetch mb genre: {response._content}")
break
elif response.status_code != 200:
logger.info(f"Failed to fetch mb genre: {response._content}")
break
data = response.json()
genres.extend(data["genres"])
# Check if we have fetched all genres
if offset + limit >= data["genre-count"]:
break
offset += limit
# mb only allow one request per second
time.sleep(1)
return genres
@celery.app.task(name="tags.update_musicbrainz_genre")
def update_musicbrainz_genre():
tags_mbid = models.Tag.objects.all().values_list("mbid", flat=True)
genres = fetch_musicbrainz_genre()
for genre in genres:
if genre["id"] in tags_mbid:
continue
create_defaults = {"name": genre["name"], "mbid": genre["id"]}
models.Tag.objects.update_or_create(
name=genre["name"],
defaults=create_defaults,
)
def sync_fw_item_tag_with_musicbrainz_tags(obj):
if obj.__class__.__name__ == "Track":
response = musicbrainz.api.recordings.get(id=obj.mbid, includes=["tags"])
mb_obj_type = "recording"
elif obj.__class__.__name__ == "Album":
response = musicbrainz.api.releases.get(
id=obj.mbid, includes=["tags", "release-groups"]
)
mb_obj_type = "release"
if mbid := response["release"].get("release-group", {}).get("id", False):
response["release"]["tag-list"].extend(
musicbrainz.api.release_groups.get(id=mbid, includes=["tags"])[
"release-group"
]["tag-list"]
)
elif obj.__class__.__name__ == "Artist":
response = musicbrainz.api.artists.get(id=obj.mbid, includes=["tags"])
mb_obj_type = "artist"
tags = [t["name"] for t in response[mb_obj_type]["tag-list"]]
models.add_tags(obj, *tags)

View File

@ -1,5 +1,6 @@
import django_filters.rest_framework import django_filters.rest_framework
from django.db.models import functions from django.db.models import functions
from django.db.models.functions import Collate
from rest_framework import viewsets from rest_framework import viewsets
from funkwhale_api.users.oauth import permissions as oauth_permissions from funkwhale_api.users.oauth import permissions as oauth_permissions
@ -12,6 +13,7 @@ class TagViewSet(viewsets.ReadOnlyModelViewSet):
queryset = ( queryset = (
models.Tag.objects.all() models.Tag.objects.all()
.annotate(__size=functions.Length("name")) .annotate(__size=functions.Length("name"))
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
.order_by("name") .order_by("name")
) )
serializer_class = serializers.TagSerializer serializer_class = serializers.TagSerializer

View File

@ -1,3 +1,5 @@
from django.db.models.functions import Collate
from funkwhale_api.tags import filters, models from funkwhale_api.tags import filters, models
@ -9,7 +11,11 @@ def test_filter_search_tag(factories, queryset_equal_list):
] ]
factories["tags.Tag"](name="TestTag") factories["tags.Tag"](name="TestTag")
factories["tags.Tag"](name="TestTag2") factories["tags.Tag"](name="TestTag2")
qs = models.Tag.objects.all().order_by("name") qs = (
models.Tag.objects.all()
.annotate(tag_deterministic=Collate("name", "und-x-icu"))
.order_by("name")
)
filterset = filters.TagFilter({"q": "tag1"}, queryset=qs) filterset = filters.TagFilter({"q": "tag1"}, queryset=qs)
assert filterset.qs == matches assert filterset.qs == matches

View File

@ -1,5 +1,5 @@
from funkwhale_api.music import models as music_models from funkwhale_api.music import models as music_models
from funkwhale_api.tags import tasks from funkwhale_api.tags import models, tasks
def test_get_tags_from_foreign_key(factories): def test_get_tags_from_foreign_key(factories):
@ -34,3 +34,53 @@ def test_add_tags_batch(factories):
) )
assert artist.get_tags() == ["Rap", "Rock"] assert artist.get_tags() == ["Rap", "Rock"]
def test_update_musicbrainz_genre(factories, mocker):
tag1 = factories["tags.Tag"](mbid="2628c282-9075-4736-b1f9-7012404d09e8")
tag2 = factories["tags.Tag"](mbid=None)
factories["tags.Tag"]()
factories["tags.Tag"]()
mb_genre = [
{"name": "dnb", "id": "aaaac282-9075-4736-b1f9-7012404daaaa"},
{"name": tag1.name, "id": "2628c282-9075-4736-b1f9-7012404d09e8"},
{"name": tag2.name, "id": "2628c282-9075-4736-b1f9-7012404daaaa"},
]
mocker.patch(
"funkwhale_api.tags.tasks.fetch_musicbrainz_genre", return_value=mb_genre
)
tasks.update_musicbrainz_genre()
assert (
str(models.Tag.objects.get(name="dnb").mbid)
== "aaaac282-9075-4736-b1f9-7012404daaaa"
)
assert (
str(models.Tag.objects.get(name=tag2.name).mbid)
== "2628c282-9075-4736-b1f9-7012404daaaa"
)
assert (
str(models.Tag.objects.get(name=tag1.name).mbid)
== "2628c282-9075-4736-b1f9-7012404d09e8"
)
def test_sync_musicbrainz_tags(factories, mocker):
objs = [
factories["music.Artist"](mbid="2628c282-9075-4736-b1f9-7012404daaaa"),
factories["music.Track"](mbid="2628c282-9075-4736-b1f9-7012404daaaa"),
factories["music.Album"](mbid="2628c282-9075-4736-b1f9-7012404dacab"),
]
obj_map = {"Artist": "artists", "Track": "recordings", "Album": "releases"}
for obj in objs:
obj_type = obj.__class__.__name__
mocker.patch(
f"funkwhale_api.tags.tasks.musicbrainz.api.{obj_map[obj_type]}.get",
return_value={
obj_map[obj_type][:-1]: {"tag-list": [{"name": "Amazing Tag"}]}
},
)
tasks.sync_fw_item_tag_with_musicbrainz_tags(obj)
obj.refresh_from_db()
assert obj.tagged_items.all()[0].tag.name == "Amazing Tag"

View File

@ -0,0 +1 @@
Add Musicbrainz genres to funkwhale tag table and allow Musicbrainz tag sync (#2143)

View File

@ -58,7 +58,9 @@ const groups = computed(() => [
settings: [ settings: [
{ name: 'music__transcoding_enabled' }, { name: 'music__transcoding_enabled' },
{ name: 'music__transcoding_cache_duration' }, { name: 'music__transcoding_cache_duration' },
{ name: 'music__only_allow_musicbrainz_tagged_files' } { name: 'music__only_allow_musicbrainz_tagged_files' },
{ name: 'music__sync_musicbrainz_tags' },
{ name: 'music__musicbrainz_genre_update' }
] ]
}, },