From 8e84e2bf39c09aa960ac990674cd47c8464a0e33 Mon Sep 17 00:00:00 2001 From: Eliot Berriot Date: Mon, 26 Nov 2018 17:07:55 +0100 Subject: [PATCH] Fix #622: More resilient date parsing during audio import, will not crash anymore on invalid dates --- api/funkwhale_api/music/metadata.py | 32 ++++++++++++++++++++++++++--- api/tests/music/test_metadata.py | 26 ++++++++++++++++++++++- changes/changelog.d/622.enhancement | 2 ++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 changes/changelog.d/622.enhancement diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py index 21daf2747..112782e41 100644 --- a/api/funkwhale_api/music/metadata.py +++ b/api/funkwhale_api/music/metadata.py @@ -1,8 +1,11 @@ import datetime +import logging import mutagen import pendulum + from django import forms +logger = logging.getLogger(__name__) NODEFAULT = object() @@ -14,6 +17,10 @@ class UnsupportedTag(KeyError): pass +class ParseError(ValueError): + pass + + def get_id3_tag(f, k): if k == "pictures": return f.tags.getall("APIC") @@ -103,8 +110,22 @@ class FirstUUIDField(forms.UUIDField): def get_date(value): - parsed = pendulum.parse(str(value)) - return datetime.date(parsed.year, parsed.month, parsed.day) + ADDITIONAL_FORMATS = ["%Y-%d-%m %H:%M"] # deezer date format + try: + parsed = pendulum.parse(str(value)) + return datetime.date(parsed.year, parsed.month, parsed.day) + except pendulum.exceptions.ParserError: + pass + + for date_format in ADDITIONAL_FORMATS: + try: + parsed = datetime.datetime.strptime(value, date_format) + except ValueError: + continue + else: + return datetime.date(parsed.year, parsed.month, parsed.day) + + raise ParseError("{} cannot be parsed as a date".format(value)) def split_and_return_first(separator): @@ -275,7 +296,7 @@ class Metadata(object): v = field.to_python(v) return v - def all(self): + def all(self, ignore_parse_errors=True): """ Return a dict containing all metadata of the file """ @@ -286,6 +307,11 @@ class Metadata(object): data[field] = self.get(field, None) except (TagNotFound, forms.ValidationError): data[field] = None + except ParseError as e: + if not ignore_parse_errors: + raise + logger.warning("Unparsable field {}: {}".format(field, str(e))) + data[field] = None return data diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py index 82c991c0b..e386c41fc 100644 --- a/api/tests/music/test_metadata.py +++ b/api/tests/music/test_metadata.py @@ -196,7 +196,31 @@ def test_mbid_clean_keeps_only_first(field_name): @pytest.mark.parametrize( "raw,expected", - [("2017", datetime.date(2017, 1, 1)), ("2017-12-31", datetime.date(2017, 12, 31))], + [ + ("2017", datetime.date(2017, 1, 1)), + ("2017-12-31", datetime.date(2017, 12, 31)), + ("2017-14-01 01:32", datetime.date(2017, 1, 14)), # deezer format + ], ) def test_date_parsing(raw, expected): assert metadata.get_date(raw) == expected + + +def test_date_parsing_failure(): + with pytest.raises(metadata.ParseError): + metadata.get_date("noop") + + +def test_metadata_all_ignore_parse_errors_true(mocker): + path = os.path.join(DATA_DIR, "sample.flac") + data = metadata.Metadata(path) + mocker.patch.object(data, "get", side_effect=metadata.ParseError("Failure")) + assert data.all()["date"] is None + + +def test_metadata_all_ignore_parse_errors_false(mocker): + path = os.path.join(DATA_DIR, "sample.flac") + data = metadata.Metadata(path) + mocker.patch.object(data, "get", side_effect=metadata.ParseError("Failure")) + with pytest.raises(metadata.ParseError): + data.all(ignore_parse_errors=False) diff --git a/changes/changelog.d/622.enhancement b/changes/changelog.d/622.enhancement new file mode 100644 index 000000000..dafebf44f --- /dev/null +++ b/changes/changelog.d/622.enhancement @@ -0,0 +1,2 @@ +More resilient date parsing during audio import, will not crash anymore on +invalid dates (#622)