See #432: tags acquisition (from audio files)
This commit is contained in:
parent
8ee5578693
commit
7bc8109732
|
@ -2,6 +2,7 @@ import base64
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import pendulum
|
import pendulum
|
||||||
|
import re
|
||||||
|
|
||||||
import mutagen._util
|
import mutagen._util
|
||||||
import mutagen.oggtheora
|
import mutagen.oggtheora
|
||||||
|
@ -144,6 +145,7 @@ CONF = {
|
||||||
"mbid": {"field": "musicbrainz_trackid"},
|
"mbid": {"field": "musicbrainz_trackid"},
|
||||||
"license": {},
|
"license": {},
|
||||||
"copyright": {},
|
"copyright": {},
|
||||||
|
"genre": {},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"OggVorbis": {
|
"OggVorbis": {
|
||||||
|
@ -162,6 +164,7 @@ CONF = {
|
||||||
"mbid": {"field": "musicbrainz_trackid"},
|
"mbid": {"field": "musicbrainz_trackid"},
|
||||||
"license": {},
|
"license": {},
|
||||||
"copyright": {},
|
"copyright": {},
|
||||||
|
"genre": {},
|
||||||
"pictures": {
|
"pictures": {
|
||||||
"field": "metadata_block_picture",
|
"field": "metadata_block_picture",
|
||||||
"to_application": clean_ogg_pictures,
|
"to_application": clean_ogg_pictures,
|
||||||
|
@ -184,6 +187,7 @@ CONF = {
|
||||||
"mbid": {"field": "MusicBrainz Track Id"},
|
"mbid": {"field": "MusicBrainz Track Id"},
|
||||||
"license": {},
|
"license": {},
|
||||||
"copyright": {},
|
"copyright": {},
|
||||||
|
"genre": {},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"MP3": {
|
"MP3": {
|
||||||
|
@ -199,6 +203,7 @@ CONF = {
|
||||||
"date": {"field": "TDRC"},
|
"date": {"field": "TDRC"},
|
||||||
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
|
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
|
||||||
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
|
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
|
||||||
|
"genre": {"field": "TCON"},
|
||||||
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
|
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
|
||||||
"mbid": {"field": "UFID", "getter": get_mp3_recording_id},
|
"mbid": {"field": "UFID", "getter": get_mp3_recording_id},
|
||||||
"pictures": {},
|
"pictures": {},
|
||||||
|
@ -220,6 +225,7 @@ CONF = {
|
||||||
"musicbrainz_albumid": {},
|
"musicbrainz_albumid": {},
|
||||||
"musicbrainz_artistid": {},
|
"musicbrainz_artistid": {},
|
||||||
"musicbrainz_albumartistid": {},
|
"musicbrainz_albumartistid": {},
|
||||||
|
"genre": {},
|
||||||
"mbid": {"field": "musicbrainz_trackid"},
|
"mbid": {"field": "musicbrainz_trackid"},
|
||||||
"test": {},
|
"test": {},
|
||||||
"pictures": {},
|
"pictures": {},
|
||||||
|
@ -485,6 +491,61 @@ class PermissiveDateField(serializers.CharField):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
TAG_REGEX = re.compile(r"^((\w+)([\d_]*))$")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_tags_from_genre(string):
|
||||||
|
tags = []
|
||||||
|
delimiter = "@@@@@"
|
||||||
|
for d in [" - ", ",", ";", "/"]:
|
||||||
|
# Replace common tags separators by a custom delimiter
|
||||||
|
string = string.replace(d, delimiter)
|
||||||
|
|
||||||
|
# loop on the parts (splitting on our custom delimiter)
|
||||||
|
for tag in string.split(delimiter):
|
||||||
|
tag = tag.strip()
|
||||||
|
for d in ["-"]:
|
||||||
|
# preparation for replacement so that Pop-Rock becomes Pop Rock, then PopRock
|
||||||
|
# (step 1, step 2 happens below)
|
||||||
|
tag = tag.replace(d, " ")
|
||||||
|
if not tag:
|
||||||
|
continue
|
||||||
|
final_tag = ""
|
||||||
|
if not TAG_REGEX.match(tag.replace(" ", "")):
|
||||||
|
# the string contains some non words chars ($, €, etc.), right now
|
||||||
|
# we simply skip such tags
|
||||||
|
continue
|
||||||
|
# concatenate the parts and uppercase them so that 'pop rock' becomes 'PopRock'
|
||||||
|
if len(tag.split(" ")) == 1:
|
||||||
|
# we append the tag "as is", because it doesn't contain any space
|
||||||
|
tags.append(tag)
|
||||||
|
continue
|
||||||
|
for part in tag.split(" "):
|
||||||
|
# the tag contains space, there's work to do to have consistent case
|
||||||
|
# 'pop rock' -> 'PopRock'
|
||||||
|
# (step 2)
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
final_tag += part[0].upper() + part[1:]
|
||||||
|
if final_tag:
|
||||||
|
tags.append(final_tag)
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
|
class TagsField(serializers.CharField):
|
||||||
|
def get_value(self, data):
|
||||||
|
return data
|
||||||
|
|
||||||
|
def to_internal_value(self, data):
|
||||||
|
try:
|
||||||
|
value = data.get("genre") or ""
|
||||||
|
except TagNotFound:
|
||||||
|
return []
|
||||||
|
value = super().to_internal_value(str(value))
|
||||||
|
|
||||||
|
return extract_tags_from_genre(value)
|
||||||
|
|
||||||
|
|
||||||
class MBIDField(serializers.UUIDField):
|
class MBIDField(serializers.UUIDField):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
kwargs.setdefault("allow_null", True)
|
kwargs.setdefault("allow_null", True)
|
||||||
|
@ -533,6 +594,7 @@ class TrackMetadataSerializer(serializers.Serializer):
|
||||||
copyright = serializers.CharField(allow_blank=True, allow_null=True, required=False)
|
copyright = serializers.CharField(allow_blank=True, allow_null=True, required=False)
|
||||||
license = serializers.CharField(allow_blank=True, allow_null=True, required=False)
|
license = serializers.CharField(allow_blank=True, allow_null=True, required=False)
|
||||||
mbid = MBIDField()
|
mbid = MBIDField()
|
||||||
|
tags = TagsField(allow_blank=True, allow_null=True, required=False)
|
||||||
|
|
||||||
album = AlbumField()
|
album = AlbumField()
|
||||||
artists = ArtistField()
|
artists = ArtistField()
|
||||||
|
@ -544,6 +606,7 @@ class TrackMetadataSerializer(serializers.Serializer):
|
||||||
"position",
|
"position",
|
||||||
"disc_number",
|
"disc_number",
|
||||||
"mbid",
|
"mbid",
|
||||||
|
"tags",
|
||||||
]
|
]
|
||||||
|
|
||||||
def validate(self, validated_data):
|
def validate(self, validated_data):
|
||||||
|
@ -553,7 +616,7 @@ class TrackMetadataSerializer(serializers.Serializer):
|
||||||
v = validated_data[field]
|
v = validated_data[field]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
if v in ["", None]:
|
if v in ["", None, []]:
|
||||||
validated_data.pop(field)
|
validated_data.pop(field)
|
||||||
return validated_data
|
return validated_data
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ from requests.exceptions import RequestException
|
||||||
from funkwhale_api.common import channels, preferences
|
from funkwhale_api.common import channels, preferences
|
||||||
from funkwhale_api.federation import routes
|
from funkwhale_api.federation import routes
|
||||||
from funkwhale_api.federation import library as lb
|
from funkwhale_api.federation import library as lb
|
||||||
|
from funkwhale_api.tags import models as tags_models
|
||||||
from funkwhale_api.taskapp import celery
|
from funkwhale_api.taskapp import celery
|
||||||
|
|
||||||
from . import licenses
|
from . import licenses
|
||||||
|
@ -541,10 +542,12 @@ def _get_track(data, attributed_to=None):
|
||||||
if data.get("fdate"):
|
if data.get("fdate"):
|
||||||
defaults["creation_date"] = data.get("fdate")
|
defaults["creation_date"] = data.get("fdate")
|
||||||
|
|
||||||
track = get_best_candidate_or_create(
|
track, created = get_best_candidate_or_create(
|
||||||
models.Track, query, defaults=defaults, sort_fields=["mbid", "fid"]
|
models.Track, query, defaults=defaults, sort_fields=["mbid", "fid"]
|
||||||
)[0]
|
)
|
||||||
|
|
||||||
|
if created:
|
||||||
|
tags_models.add_tags(track, *data.get("tags", []))
|
||||||
return track
|
return track
|
||||||
|
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -57,6 +57,7 @@ def test_can_get_metadata_all():
|
||||||
"musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f",
|
"musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f",
|
||||||
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
||||||
"copyright": "Someone",
|
"copyright": "Someone",
|
||||||
|
"genre": "Classical",
|
||||||
}
|
}
|
||||||
assert data.all() == expected
|
assert data.all() == expected
|
||||||
|
|
||||||
|
@ -249,6 +250,7 @@ def test_metadata_fallback_ogg_theora(mocker):
|
||||||
"mbid": uuid.UUID("f269d497-1cc0-4ae4-a0c4-157ec7d73fcb"),
|
"mbid": uuid.UUID("f269d497-1cc0-4ae4-a0c4-157ec7d73fcb"),
|
||||||
"license": "https://creativecommons.org/licenses/by-nc-nd/2.5/",
|
"license": "https://creativecommons.org/licenses/by-nc-nd/2.5/",
|
||||||
"copyright": "Someone",
|
"copyright": "Someone",
|
||||||
|
"tags": ["Funk"],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
@ -281,6 +283,7 @@ def test_metadata_fallback_ogg_theora(mocker):
|
||||||
"mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"),
|
"mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"),
|
||||||
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
||||||
"copyright": "Someone",
|
"copyright": "Someone",
|
||||||
|
"tags": ["Classical"],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
@ -313,6 +316,7 @@ def test_metadata_fallback_ogg_theora(mocker):
|
||||||
"mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"),
|
"mbid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"),
|
||||||
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
||||||
"copyright": "Someone",
|
"copyright": "Someone",
|
||||||
|
"tags": ["Classical"],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
@ -336,6 +340,7 @@ def test_metadata_fallback_ogg_theora(mocker):
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"tags": ["Rock"],
|
||||||
"position": 1,
|
"position": 1,
|
||||||
"disc_number": 1,
|
"disc_number": 1,
|
||||||
"mbid": uuid.UUID("124d0150-8627-46bc-bc14-789a3bc960c8"),
|
"mbid": uuid.UUID("124d0150-8627-46bc-bc14-789a3bc960c8"),
|
||||||
|
@ -371,6 +376,7 @@ def test_metadata_fallback_ogg_theora(mocker):
|
||||||
"mbid": uuid.UUID("30f3f33e-8d0c-4e69-8539-cbd701d18f28"),
|
"mbid": uuid.UUID("30f3f33e-8d0c-4e69-8539-cbd701d18f28"),
|
||||||
"license": "http://creativecommons.org/licenses/by-nc-sa/3.0/us/",
|
"license": "http://creativecommons.org/licenses/by-nc-sa/3.0/us/",
|
||||||
"copyright": "2008 nin",
|
"copyright": "2008 nin",
|
||||||
|
"tags": ["Industrial"],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
@ -607,3 +613,43 @@ def test_artist_field_featuring():
|
||||||
value = field.get_value(data)
|
value = field.get_value(data)
|
||||||
|
|
||||||
assert field.to_internal_value(value) == expected
|
assert field.to_internal_value(value) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"genre, expected_tags",
|
||||||
|
[
|
||||||
|
("Pop", ["Pop"]),
|
||||||
|
("pop", ["pop"]),
|
||||||
|
("Pop-Rock", ["PopRock"]),
|
||||||
|
("Pop - Rock", ["Pop", "Rock"]),
|
||||||
|
("Soundtrack - Cute Anime", ["Soundtrack", "CuteAnime"]),
|
||||||
|
("Pop, Rock", ["Pop", "Rock"]),
|
||||||
|
("Chanson française", ["ChansonFrançaise"]),
|
||||||
|
("Unhandled❤️", []),
|
||||||
|
("tag with non-breaking spaces", []),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_acquire_tags_from_genre(genre, expected_tags):
|
||||||
|
data = {
|
||||||
|
"title": "Track Title",
|
||||||
|
"artist": "Track Artist",
|
||||||
|
"album": "Track Album",
|
||||||
|
"genre": genre,
|
||||||
|
}
|
||||||
|
expected = {
|
||||||
|
"title": "Track Title",
|
||||||
|
"artists": [{"name": "Track Artist", "mbid": None}],
|
||||||
|
"album": {
|
||||||
|
"title": "Track Album",
|
||||||
|
"mbid": None,
|
||||||
|
"release_date": None,
|
||||||
|
"artists": [],
|
||||||
|
},
|
||||||
|
"cover_data": None,
|
||||||
|
}
|
||||||
|
if expected_tags:
|
||||||
|
expected["tags"] = expected_tags
|
||||||
|
|
||||||
|
serializer = metadata.TrackMetadataSerializer(data=metadata.FakeMetadata(data))
|
||||||
|
assert serializer.is_valid(raise_exception=True) is True
|
||||||
|
assert serializer.validated_data == expected
|
||||||
|
|
|
@ -448,7 +448,7 @@ def test_get_audio_data(factories):
|
||||||
|
|
||||||
result = upload.get_audio_data()
|
result = upload.get_audio_data()
|
||||||
|
|
||||||
assert result == {"duration": 1, "bitrate": 112000, "size": 14858}
|
assert result == {"duration": 1, "bitrate": 112000, "size": 15918}
|
||||||
|
|
||||||
|
|
||||||
def test_library_queryset_with_follows(factories):
|
def test_library_queryset_with_follows(factories):
|
||||||
|
|
|
@ -18,6 +18,7 @@ DATA_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def test_can_create_track_from_file_metadata_no_mbid(db, mocker):
|
def test_can_create_track_from_file_metadata_no_mbid(db, mocker):
|
||||||
|
add_tags = mocker.patch("funkwhale_api.tags.models.add_tags")
|
||||||
metadata = {
|
metadata = {
|
||||||
"title": "Test track",
|
"title": "Test track",
|
||||||
"artists": [{"name": "Test artist"}],
|
"artists": [{"name": "Test artist"}],
|
||||||
|
@ -26,6 +27,7 @@ def test_can_create_track_from_file_metadata_no_mbid(db, mocker):
|
||||||
"disc_number": 2,
|
"disc_number": 2,
|
||||||
"license": "Hello world: http://creativecommons.org/licenses/by-sa/4.0/",
|
"license": "Hello world: http://creativecommons.org/licenses/by-sa/4.0/",
|
||||||
"copyright": "2018 Someone",
|
"copyright": "2018 Someone",
|
||||||
|
"tags": ["Punk", "Rock"],
|
||||||
}
|
}
|
||||||
match_license = mocker.spy(licenses, "match")
|
match_license = mocker.spy(licenses, "match")
|
||||||
|
|
||||||
|
@ -44,6 +46,7 @@ def test_can_create_track_from_file_metadata_no_mbid(db, mocker):
|
||||||
assert track.artist.mbid is None
|
assert track.artist.mbid is None
|
||||||
assert track.artist.attributed_to is None
|
assert track.artist.attributed_to is None
|
||||||
match_license.assert_called_once_with(metadata["license"], metadata["copyright"])
|
match_license.assert_called_once_with(metadata["license"], metadata["copyright"])
|
||||||
|
add_tags.assert_called_once_with(track, *metadata["tags"])
|
||||||
|
|
||||||
|
|
||||||
def test_can_create_track_from_file_metadata_attributed_to(factories, mocker):
|
def test_can_create_track_from_file_metadata_attributed_to(factories, mocker):
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue