Fix #376: Smarter date parsing during import by replacing arrow with pendulum

This commit is contained in:
Eliot Berriot 2018-07-24 23:05:18 +02:00
parent 8974881f95
commit 4781e7821e
No known key found for this signature in database
GPG Key ID: DD6965E2476E5C27
7 changed files with 28 additions and 22 deletions

View File

@ -1,5 +1,6 @@
import arrow
import datetime
import mutagen
import pendulum
from django import forms
NODEFAULT = object()
@ -101,6 +102,11 @@ class FirstUUIDField(forms.UUIDField):
return super().to_python(value)
def get_date(value):
parsed = pendulum.parse(str(value))
return datetime.date(parsed.year, parsed.month, parsed.day)
VALIDATION = {
"musicbrainz_artistid": FirstUUIDField(),
"musicbrainz_albumid": FirstUUIDField(),
@ -118,7 +124,7 @@ CONF = {
"title": {},
"artist": {},
"album": {},
"date": {"field": "date", "to_application": lambda v: arrow.get(v).date()},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
@ -134,7 +140,7 @@ CONF = {
"title": {},
"artist": {},
"album": {},
"date": {"field": "date", "to_application": lambda v: arrow.get(v).date()},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
"musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
@ -148,10 +154,7 @@ CONF = {
"title": {"field": "TIT2"},
"artist": {"field": "TPE1"},
"album": {"field": "TALB"},
"date": {
"field": "TDRC",
"to_application": lambda v: arrow.get(str(v)).date(),
},
"date": {"field": "TDRC", "to_application": get_date},
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
"musicbrainz_recordingid": {
@ -172,10 +175,7 @@ CONF = {
"title": {},
"artist": {},
"album": {},
"date": {
"field": "date",
"to_application": lambda v: arrow.get(str(v)).date(),
},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},

View File

@ -1,11 +1,10 @@
import datetime
import os
import shutil
import tempfile
import uuid
import arrow
import markdown
import pendulum
from django.conf import settings
from django.core.files import File
from django.core.files.base import ContentFile
@ -125,9 +124,7 @@ def import_artist(v):
def parse_date(v):
if len(v) == 4:
return datetime.date(int(v), 1, 1)
d = arrow.get(v).date()
d = pendulum.parse(v).date()
return d

View File

@ -35,7 +35,7 @@ djangorestframework>=3.7,<3.8
djangorestframework-jwt>=1.11,<1.12
oauth2client<4
google-api-python-client>=1.6,<1.7
arrow>=0.12,<0.13
pendulum>=2,<3
persisting-theory>=0.2,<0.3
django-versatileimagefield>=1.9,<1.10
django-filter>=1.1,<1.2

View File

@ -1,4 +1,4 @@
import arrow
import pendulum
import pytest
from django.urls import reverse
from django.utils import timezone
@ -455,7 +455,7 @@ def test_library_actor_handle_create_audio(mocker, factories):
assert lt.title == a["metadata"]["recording"]["title"]
assert lt.artist_name == a["metadata"]["artist"]["name"]
assert lt.album_title == a["metadata"]["release"]["title"]
assert lt.published_date == arrow.get(a["published"])
assert lt.published_date == pendulum.parse(a["published"])
def test_library_actor_handle_create_audio_autoimport(mocker, factories):
@ -494,7 +494,7 @@ def test_library_actor_handle_create_audio_autoimport(mocker, factories):
assert lt.title == a["metadata"]["recording"]["title"]
assert lt.artist_name == a["metadata"]["artist"]["name"]
assert lt.album_title == a["metadata"]["release"]["title"]
assert lt.published_date == arrow.get(a["published"])
assert lt.published_date == pendulum.parse(a["published"])
batch = music_models.ImportBatch.objects.latest("id")

View File

@ -1,4 +1,4 @@
import arrow
import pendulum
import pytest
from django.core.paginator import Paginator
@ -492,7 +492,7 @@ def test_activity_pub_audio_serializer_to_library_track(factories):
assert lt.title == audio["metadata"]["recording"]["title"]
assert lt.artist_name == audio["metadata"]["artist"]["name"]
assert lt.album_title == audio["metadata"]["release"]["title"]
assert lt.published_date == arrow.get(audio["published"])
assert lt.published_date == pendulum.parse(audio["published"])
def test_activity_pub_audio_serializer_to_library_track_no_duplicate(factories):

View File

@ -122,3 +122,11 @@ def test_mbid_clean_keeps_only_first(field_name):
result = field.to_python("/".join([u1, u2]))
assert str(result) == u1
@pytest.mark.parametrize(
"raw,expected",
[("2017", datetime.date(2017, 1, 1)), ("2017-12-31", datetime.date(2017, 12, 31))],
)
def test_date_parsing(raw, expected):
assert metadata.get_date(raw) == expected

View File

@ -0,0 +1 @@
Smarter date parsing during import by replacing arrow with pendulum (#376)