357 lines
10 KiB
Python
357 lines
10 KiB
Python
import datetime
|
|
import logging
|
|
import mutagen
|
|
import pendulum
|
|
|
|
from django import forms
|
|
|
|
logger = logging.getLogger(__name__)
|
|
NODEFAULT = object()
|
|
|
|
|
|
class TagNotFound(KeyError):
|
|
pass
|
|
|
|
|
|
class UnsupportedTag(KeyError):
|
|
pass
|
|
|
|
|
|
class ParseError(ValueError):
|
|
pass
|
|
|
|
|
|
def get_id3_tag(f, k):
|
|
if k == "pictures":
|
|
return f.tags.getall("APIC")
|
|
# First we try to grab the standard key
|
|
possible_attributes = [("text", True), ("url", False)]
|
|
for attr, select_first in possible_attributes:
|
|
try:
|
|
v = getattr(f.tags[k], attr)
|
|
if select_first:
|
|
v = v[0]
|
|
return v
|
|
except KeyError:
|
|
break
|
|
except AttributeError:
|
|
continue
|
|
|
|
# then we fallback on parsing non standard tags
|
|
all_tags = f.tags.getall("TXXX")
|
|
try:
|
|
matches = [t for t in all_tags if t.desc.lower() == k.lower()]
|
|
return matches[0].text[0]
|
|
except (KeyError, IndexError):
|
|
raise TagNotFound(k)
|
|
|
|
|
|
def clean_id3_pictures(apic):
|
|
pictures = []
|
|
for p in list(apic):
|
|
pictures.append(
|
|
{
|
|
"mimetype": p.mime,
|
|
"content": p.data,
|
|
"description": p.desc,
|
|
"type": p.type.real,
|
|
}
|
|
)
|
|
return pictures
|
|
|
|
|
|
def get_flac_tag(f, k):
|
|
if k == "pictures":
|
|
return f.pictures
|
|
try:
|
|
return f.get(k, [])[0]
|
|
except (KeyError, IndexError):
|
|
raise TagNotFound(k)
|
|
|
|
|
|
def clean_flac_pictures(apic):
|
|
pictures = []
|
|
for p in list(apic):
|
|
pictures.append(
|
|
{
|
|
"mimetype": p.mime,
|
|
"content": p.data,
|
|
"description": p.desc,
|
|
"type": p.type.real,
|
|
}
|
|
)
|
|
return pictures
|
|
|
|
|
|
def get_mp3_recording_id(f, k):
|
|
try:
|
|
return [t for t in f.tags.getall("UFID") if "musicbrainz.org" in t.owner][
|
|
0
|
|
].data.decode("utf-8")
|
|
except IndexError:
|
|
raise TagNotFound(k)
|
|
|
|
|
|
def convert_position(v):
|
|
try:
|
|
return int(v)
|
|
except ValueError:
|
|
# maybe the position is of the form "1/4"
|
|
pass
|
|
|
|
try:
|
|
return int(v.split("/")[0])
|
|
except (ValueError, AttributeError, IndexError):
|
|
pass
|
|
|
|
|
|
class FirstUUIDField(forms.UUIDField):
|
|
def to_python(self, value):
|
|
try:
|
|
# sometimes, Picard leaves two uuids in the field, separated
|
|
# by a slash or a ;
|
|
value = value.split(";")[0].split("/")[0].strip()
|
|
except (AttributeError, IndexError, TypeError):
|
|
pass
|
|
|
|
return super().to_python(value)
|
|
|
|
|
|
def get_date(value):
|
|
ADDITIONAL_FORMATS = ["%Y-%d-%m %H:%M"] # deezer date format
|
|
try:
|
|
parsed = pendulum.parse(str(value))
|
|
return datetime.date(parsed.year, parsed.month, parsed.day)
|
|
except pendulum.exceptions.ParserError:
|
|
pass
|
|
|
|
for date_format in ADDITIONAL_FORMATS:
|
|
try:
|
|
parsed = datetime.datetime.strptime(value, date_format)
|
|
except ValueError:
|
|
continue
|
|
else:
|
|
return datetime.date(parsed.year, parsed.month, parsed.day)
|
|
|
|
raise ParseError("{} cannot be parsed as a date".format(value))
|
|
|
|
|
|
def split_and_return_first(separator):
|
|
def inner(v):
|
|
return v.split(separator)[0].strip()
|
|
|
|
return inner
|
|
|
|
|
|
VALIDATION = {
|
|
"musicbrainz_artistid": FirstUUIDField(),
|
|
"musicbrainz_albumid": FirstUUIDField(),
|
|
"musicbrainz_recordingid": FirstUUIDField(),
|
|
"musicbrainz_albumartistid": FirstUUIDField(),
|
|
}
|
|
|
|
CONF = {
|
|
"OggOpus": {
|
|
"getter": lambda f, k: f[k][0],
|
|
"fields": {
|
|
"track_number": {
|
|
"field": "TRACKNUMBER",
|
|
"to_application": convert_position,
|
|
},
|
|
"disc_number": {"field": "DISCNUMBER", "to_application": convert_position},
|
|
"title": {},
|
|
"artist": {},
|
|
"album_artist": {
|
|
"field": "albumartist",
|
|
"to_application": split_and_return_first(";"),
|
|
},
|
|
"album": {},
|
|
"date": {"field": "date", "to_application": get_date},
|
|
"musicbrainz_albumid": {},
|
|
"musicbrainz_artistid": {},
|
|
"musicbrainz_albumartistid": {},
|
|
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
|
|
"license": {},
|
|
"copyright": {},
|
|
},
|
|
},
|
|
"OggVorbis": {
|
|
"getter": lambda f, k: f[k][0],
|
|
"fields": {
|
|
"track_number": {
|
|
"field": "TRACKNUMBER",
|
|
"to_application": convert_position,
|
|
},
|
|
"disc_number": {"field": "DISCNUMBER", "to_application": convert_position},
|
|
"title": {},
|
|
"artist": {},
|
|
"album_artist": {
|
|
"field": "albumartist",
|
|
"to_application": split_and_return_first(";"),
|
|
},
|
|
"album": {},
|
|
"date": {"field": "date", "to_application": get_date},
|
|
"musicbrainz_albumid": {},
|
|
"musicbrainz_artistid": {},
|
|
"musicbrainz_albumartistid": {},
|
|
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
|
|
"license": {},
|
|
"copyright": {},
|
|
},
|
|
},
|
|
"OggTheora": {
|
|
"getter": lambda f, k: f[k][0],
|
|
"fields": {
|
|
"track_number": {
|
|
"field": "TRACKNUMBER",
|
|
"to_application": convert_position,
|
|
},
|
|
"disc_number": {"field": "DISCNUMBER", "to_application": convert_position},
|
|
"title": {},
|
|
"artist": {},
|
|
"album_artist": {"field": "albumartist"},
|
|
"album": {},
|
|
"date": {"field": "date", "to_application": get_date},
|
|
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
|
|
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
|
|
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
|
|
"musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
|
|
# somehow, I cannot successfully create an ogg theora file
|
|
# with the proper license field
|
|
# "license": {"field": "license"},
|
|
},
|
|
},
|
|
"MP3": {
|
|
"getter": get_id3_tag,
|
|
"clean_pictures": clean_id3_pictures,
|
|
"fields": {
|
|
"track_number": {"field": "TRCK", "to_application": convert_position},
|
|
"disc_number": {"field": "TPOS", "to_application": convert_position},
|
|
"title": {"field": "TIT2"},
|
|
"artist": {"field": "TPE1"},
|
|
"album_artist": {"field": "TPE2"},
|
|
"album": {"field": "TALB"},
|
|
"date": {"field": "TDRC", "to_application": get_date},
|
|
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
|
|
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
|
|
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
|
|
"musicbrainz_recordingid": {
|
|
"field": "UFID",
|
|
"getter": get_mp3_recording_id,
|
|
},
|
|
"pictures": {},
|
|
"license": {"field": "WCOP"},
|
|
"copyright": {"field": "TCOP"},
|
|
},
|
|
},
|
|
"FLAC": {
|
|
"getter": get_flac_tag,
|
|
"clean_pictures": clean_flac_pictures,
|
|
"fields": {
|
|
"track_number": {
|
|
"field": "tracknumber",
|
|
"to_application": convert_position,
|
|
},
|
|
"disc_number": {"field": "discnumber", "to_application": convert_position},
|
|
"title": {},
|
|
"artist": {},
|
|
"album_artist": {"field": "albumartist"},
|
|
"album": {},
|
|
"date": {"field": "date", "to_application": get_date},
|
|
"musicbrainz_albumid": {},
|
|
"musicbrainz_artistid": {},
|
|
"musicbrainz_albumartistid": {},
|
|
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
|
|
"test": {},
|
|
"pictures": {},
|
|
"license": {},
|
|
"copyright": {},
|
|
},
|
|
},
|
|
}
|
|
|
|
ALL_FIELDS = [
|
|
"track_number",
|
|
"disc_number",
|
|
"title",
|
|
"artist",
|
|
"album_artist",
|
|
"album",
|
|
"date",
|
|
"musicbrainz_albumid",
|
|
"musicbrainz_artistid",
|
|
"musicbrainz_albumartistid",
|
|
"musicbrainz_recordingid",
|
|
"license",
|
|
"copyright",
|
|
]
|
|
|
|
|
|
class Metadata(object):
|
|
def __init__(self, path):
|
|
self._file = mutagen.File(path)
|
|
if self._file is None:
|
|
raise ValueError("Cannot parse metadata from {}".format(path))
|
|
ft = self.get_file_type(self._file)
|
|
try:
|
|
self._conf = CONF[ft]
|
|
except KeyError:
|
|
raise ValueError("Unsupported format {}".format(ft))
|
|
|
|
def get_file_type(self, f):
|
|
return f.__class__.__name__
|
|
|
|
def get(self, key, default=NODEFAULT):
|
|
try:
|
|
field_conf = self._conf["fields"][key]
|
|
except KeyError:
|
|
raise UnsupportedTag("{} is not supported for this file format".format(key))
|
|
real_key = field_conf.get("field", key)
|
|
try:
|
|
getter = field_conf.get("getter", self._conf["getter"])
|
|
v = getter(self._file, real_key)
|
|
except KeyError:
|
|
if default == NODEFAULT:
|
|
raise TagNotFound(real_key)
|
|
return default
|
|
|
|
converter = field_conf.get("to_application")
|
|
if converter:
|
|
v = converter(v)
|
|
field = VALIDATION.get(key)
|
|
if field:
|
|
v = field.to_python(v)
|
|
return v
|
|
|
|
def all(self, ignore_parse_errors=True):
|
|
"""
|
|
Return a dict containing all metadata of the file
|
|
"""
|
|
|
|
data = {}
|
|
for field in ALL_FIELDS:
|
|
try:
|
|
data[field] = self.get(field, None)
|
|
except (TagNotFound, forms.ValidationError):
|
|
data[field] = None
|
|
except ParseError as e:
|
|
if not ignore_parse_errors:
|
|
raise
|
|
logger.warning("Unparsable field {}: {}".format(field, str(e)))
|
|
data[field] = None
|
|
|
|
return data
|
|
|
|
def get_picture(self, picture_type="cover_front"):
|
|
ptype = getattr(mutagen.id3.PictureType, picture_type.upper())
|
|
try:
|
|
pictures = self.get("pictures")
|
|
except (UnsupportedTag, TagNotFound):
|
|
return
|
|
|
|
cleaner = self._conf.get("clean_pictures", lambda v: v)
|
|
pictures = cleaner(pictures)
|
|
for p in pictures:
|
|
if p["type"] == ptype:
|
|
return p
|