Now use import job everywhere, even for direct file imports

This commit is contained in:
Eliot Berriot 2017-12-27 20:29:26 +01:00
parent 5d2dbbc828
commit 2e616282fd
No known key found for this signature in database
GPG Key ID: DD6965E2476E5C27
9 changed files with 318 additions and 66 deletions

View File

@ -72,6 +72,14 @@ class ImportJobFactory(factory.django.DjangoModelFactory):
model = 'music.ImportJob' model = 'music.ImportJob'
@registry.register(name='music.FileImportJob')
class FileImportJobFactory(ImportJobFactory):
source = 'file://'
mbid = None
audio_file = factory.django.FileField(
from_path=os.path.join(SAMPLES_PATH, 'test.ogg'))
@registry.register @registry.register
class WorkFactory(factory.django.DjangoModelFactory): class WorkFactory(factory.django.DjangoModelFactory):
mbid = factory.Faker('uuid4') mbid = factory.Faker('uuid4')

View File

@ -0,0 +1,28 @@
# Generated by Django 2.0 on 2017-12-27 17:28
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0016_trackfile_acoustid_track_id'),
]
operations = [
migrations.AddField(
model_name='importbatch',
name='source',
field=models.CharField(choices=[('api', 'api'), ('shell', 'shell')], default='api', max_length=30),
),
migrations.AddField(
model_name='importjob',
name='audio_file',
field=models.FileField(blank=True, max_length=255, null=True, upload_to='imports/%Y/%m/%d'),
),
migrations.AlterField(
model_name='importjob',
name='mbid',
field=models.UUIDField(blank=True, editable=False, null=True),
),
]

View File

@ -384,9 +384,17 @@ class TrackFile(models.Model):
class ImportBatch(models.Model): class ImportBatch(models.Model):
IMPORT_BATCH_SOURCES = [
('api', 'api'),
('shell', 'shell')
]
source = models.CharField(
max_length=30, default='api', choices=IMPORT_BATCH_SOURCES)
creation_date = models.DateTimeField(default=timezone.now) creation_date = models.DateTimeField(default=timezone.now)
submitted_by = models.ForeignKey( submitted_by = models.ForeignKey(
'users.User', related_name='imports', on_delete=models.CASCADE) 'users.User',
related_name='imports',
on_delete=models.CASCADE)
class Meta: class Meta:
ordering = ['-creation_date'] ordering = ['-creation_date']
@ -411,12 +419,16 @@ class ImportJob(models.Model):
blank=True, blank=True,
on_delete=models.CASCADE) on_delete=models.CASCADE)
source = models.URLField() source = models.URLField()
mbid = models.UUIDField(editable=False) mbid = models.UUIDField(editable=False, null=True, blank=True)
STATUS_CHOICES = ( STATUS_CHOICES = (
('pending', 'Pending'), ('pending', 'Pending'),
('finished', 'finished'), ('finished', 'Finished'),
('errored', 'Errored'),
('skipped', 'Skipped'),
) )
status = models.CharField(choices=STATUS_CHOICES, default='pending', max_length=30) status = models.CharField(choices=STATUS_CHOICES, default='pending', max_length=30)
audio_file = models.FileField(
upload_to='imports/%Y/%m/%d', max_length=255, null=True, blank=True)
class Meta: class Meta:
ordering = ('id', ) ordering = ('id', )

View File

@ -1,3 +1,5 @@
from django.core.files.base import ContentFile
from funkwhale_api.taskapp import celery from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client from funkwhale_api.providers.acoustid import get_acoustid_client
@ -20,29 +22,72 @@ def set_acoustid_on_track_file(track_file):
return update(result['id']) return update(result['id'])
def _do_import(import_job, replace):
from_file = bool(import_job.audio_file)
mbid = import_job.mbid
acoustid_track_id = None
duration = None
track = None
if not mbid and from_file:
# we try to deduce mbid from acoustid
client = get_acoustid_client()
match = client.get_best_match(import_job.audio_file.path)
if not match:
raise ValueError('Cannot get match')
duration = match['recordings'][0]['duration']
mbid = match['recordings'][0]['id']
acoustid_track_id = match['id']
if mbid:
track, _ = models.Track.get_or_create_from_api(mbid=mbid)
else:
track = import_track_data_from_path(import_job.audio_file.path)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
if import_job.audio_file:
import_job.audio_file.delete()
import_job.status = 'skipped'
import_job.save()
return
track_file = track_file or models.TrackFile(
track=track, source=import_job.source)
track_file.acoustid_track_id = acoustid_track_id
if from_file:
track_file.audio_file = ContentFile(import_job.audio_file.read())
track_file.audio_file.name = import_job.audio_file.name
track_file.duration = duration
else:
track_file.download_file()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file
if import_job.audio_file:
# it's imported on the track, we don't need it anymore
import_job.audio_file.delete()
import_job.save()
return track.pk
@celery.app.task(name='ImportJob.run', bind=True) @celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(models.ImportJob, 'import_job') @celery.require_instance(models.ImportJob, 'import_job')
def import_job_run(self, import_job, replace=False): def import_job_run(self, import_job, replace=False):
try: def mark_errored():
track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid) import_job.status = 'errored'
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or models.TrackFile(
track=track, source=import_job.source)
track_file.download_file()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file
import_job.save() import_job.save()
return track.pk
try:
return _do_import(import_job, replace)
except Exception as exc: except Exception as exc:
if not settings.DEBUG: if not settings.DEBUG:
raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3) try:
self.retry(exc=exc, countdown=30, max_retries=3)
except:
mark_errored()
raise
mark_errored()
raise raise

View File

@ -1,6 +1,10 @@
import glob import glob
import os
from django.core.files import File
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from funkwhale_api.providers.audiofile import tasks from funkwhale_api.music import tasks
from funkwhale_api.users.models import User
class Command(BaseCommand): class Command(BaseCommand):
@ -15,6 +19,11 @@ class Command(BaseCommand):
default=False, default=False,
help='Will match the pattern recursively (including subdirectories)', help='Will match the pattern recursively (including subdirectories)',
) )
parser.add_argument(
'--username',
dest='username',
help='The username of the user you want to be bound to the import',
)
parser.add_argument( parser.add_argument(
'--async', '--async',
action='store_true', action='store_true',
@ -46,6 +55,20 @@ class Command(BaseCommand):
if not matching: if not matching:
raise CommandError('No file matching pattern, aborting') raise CommandError('No file matching pattern, aborting')
user = None
if options['username']:
try:
user = User.objects.get(username=options['username'])
except User.DoesNotExist:
raise CommandError('Invalid username')
else:
# we bind the import to the first registered superuser
try:
user = User.objects.filter(is_superuser=True).order_by('pk').first()
assert user is not None
except AssertionError:
raise CommandError(
'No superuser available, please provide a --username')
if options['interactive']: if options['interactive']:
message = ( message = (
'Are you sure you want to do this?\n\n' 'Are you sure you want to do this?\n\n'
@ -54,18 +77,35 @@ class Command(BaseCommand):
if input(''.join(message)) != 'yes': if input(''.join(message)) != 'yes':
raise CommandError("Import cancelled.") raise CommandError("Import cancelled.")
message = 'Importing {}...' batch = self.do_import(matching, user=user, options=options)
if options['async']:
message = 'Launching import for {}...'
for path in matching:
self.stdout.write(message.format(path))
try:
tasks.from_path(path)
except Exception as e:
self.stdout.write('Error: {}'.format(e))
message = 'Successfully imported {} tracks' message = 'Successfully imported {} tracks'
if options['async']: if options['async']:
message = 'Successfully launched import for {} tracks' message = 'Successfully launched import for {} tracks'
self.stdout.write(message.format(len(matching))) self.stdout.write(message.format(len(matching)))
self.stdout.write(
"For details, please refer to import batch #".format(batch.pk))
def do_import(self, matching, user, options):
message = 'Importing {}...'
if options['async']:
message = 'Launching import for {}...'
# we create an import batch binded to the user
batch = user.imports.create(source='shell')
async = options['async']
handler = tasks.import_job_run.delay if async else tasks.import_job_run
for path in matching:
job = batch.jobs.create(
source='file://' + path,
)
name = os.path.basename(path)
with open(path, 'rb') as f:
job.audio_file.save(name, File(f))
job.save()
try:
handler(import_job_id=job.pk)
except Exception as e:
self.stdout.write('Error: {}'.format(e))
return batch

View File

@ -8,7 +8,7 @@ from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import models, metadata from funkwhale_api.music import models, metadata
def import_metadata_without_musicbrainz(path): def import_track_data_from_path(path):
data = metadata.Metadata(path) data = metadata.Metadata(path)
artist = models.Artist.objects.get_or_create( artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'), name__iexact=data.get('artist'),
@ -53,7 +53,7 @@ def from_path(path):
result = client.get_best_match(path) result = client.get_best_match(path)
acoustid_track_id = result['id'] acoustid_track_id = result['id']
except acoustid.WebServiceError: except acoustid.WebServiceError:
track = import_metadata_without_musicbrainz(path) track = import_track_data_from_path(path)
except (TypeError, KeyError): except (TypeError, KeyError):
track = import_metadata_without_musicbrainz(path) track = import_metadata_without_musicbrainz(path)
else: else:

View File

@ -1,7 +1,13 @@
from funkwhale_api.providers.acoustid import get_acoustid_client import os
import pytest
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import tasks from funkwhale_api.music import tasks
from . import data as api_data
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
def test_set_acoustid_on_track_file(factories, mocker): def test_set_acoustid_on_track_file(factories, mocker):
track_file = factories['music.TrackFile'](acoustid_track_id=None) track_file = factories['music.TrackFile'](acoustid_track_id=None)
@ -40,3 +46,108 @@ def test_set_acoustid_on_track_file_required_high_score(factories, mocker):
track_file.refresh_from_db() track_file.refresh_from_db()
assert track_file.acoustid_track_id is None assert track_file.acoustid_track_id is None
def test_import_job_can_run_with_file_and_acoustid(factories, mocker):
path = os.path.join(DATA_DIR, 'test.ogg')
mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
acoustid_payload = {
'results': [
{'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [
{
'duration': 268,
'id': mbid}],
'score': 0.860825}],
'status': 'ok'
}
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['adhesive_wombat'])
mocker.patch(
'funkwhale_api.musicbrainz.api.releases.get',
return_value=api_data.albums['get']['marsupial'])
mocker.patch(
'funkwhale_api.musicbrainz.api.recordings.search',
return_value=api_data.tracks['search']['8bitadventures'])
mocker.patch('acoustid.match', return_value=acoustid_payload)
job = factories['music.FileImportJob'](audio_file__path=path)
f = job.audio_file
tasks.import_job_run(import_job_id=job.pk)
job.refresh_from_db()
track_file = job.track_file
with open(path, 'rb') as f:
assert track_file.audio_file.read() == f.read()
assert track_file.duration == 268
# audio file is deleted from import job once persisted to audio file
assert not job.audio_file
assert job.status == 'finished'
assert job.source == 'file://'
def test_import_job_can_be_skipped(factories, mocker):
path = os.path.join(DATA_DIR, 'test.ogg')
mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
track_file = factories['music.TrackFile'](track__mbid=mbid)
acoustid_payload = {
'results': [
{'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [
{
'duration': 268,
'id': mbid}],
'score': 0.860825}],
'status': 'ok'
}
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['adhesive_wombat'])
mocker.patch(
'funkwhale_api.musicbrainz.api.releases.get',
return_value=api_data.albums['get']['marsupial'])
mocker.patch(
'funkwhale_api.musicbrainz.api.recordings.search',
return_value=api_data.tracks['search']['8bitadventures'])
mocker.patch('acoustid.match', return_value=acoustid_payload)
job = factories['music.FileImportJob'](audio_file__path=path)
f = job.audio_file
tasks.import_job_run(import_job_id=job.pk)
job.refresh_from_db()
assert job.track_file is None
# audio file is deleted from import job once persisted to audio file
assert not job.audio_file
assert job.status == 'skipped'
def test_import_job_can_be_errored(factories, mocker):
path = os.path.join(DATA_DIR, 'test.ogg')
mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
track_file = factories['music.TrackFile'](track__mbid=mbid)
acoustid_payload = {
'results': [
{'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [
{
'duration': 268,
'id': mbid}],
'score': 0.860825}],
'status': 'ok'
}
class MyException(Exception):
pass
mocker.patch('acoustid.match', side_effect=MyException())
job = factories['music.FileImportJob'](
audio_file__path=path, track_file=None)
with pytest.raises(MyException):
tasks.import_job_run(import_job_id=job.pk)
job.refresh_from_db()
assert job.track_file is None
assert job.status == 'errored'

View File

@ -1,8 +1,10 @@
import pytest
import acoustid import acoustid
import datetime import datetime
import os import os
from django.core.management import call_command
from django.core.management.base import CommandError
from .music import data as api_data
from funkwhale_api.providers.audiofile import tasks from funkwhale_api.providers.audiofile import tasks
DATA_DIR = os.path.join( DATA_DIR = os.path.join(
@ -11,35 +13,7 @@ DATA_DIR = os.path.join(
) )
def test_import_file_with_acoustid(db, mocker, preferences): def test_can_create_track_from_file_metadata(db, mocker):
mbid = api_data.tracks['get']['8bitadventures']['recording']['id']
payload = {
'results': [{
'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [{'id': mbid}],
'score': 0.86
}]
}
path = os.path.join(DATA_DIR, 'dummy_file.ogg')
m = mocker.patch('acoustid.match', return_value=payload)
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['adhesive_wombat'])
mocker.patch(
'funkwhale_api.musicbrainz.api.releases.get',
return_value=api_data.albums['get']['marsupial'])
mocker.patch(
'funkwhale_api.musicbrainz.api.recordings.get',
return_value=api_data.tracks['get']['8bitadventures'])
track_file = tasks.from_path(path)
result = payload['results'][0]
assert track_file.acoustid_track_id == result['id']
assert track_file.track.mbid == result['recordings'][0]['id']
m.assert_called_once_with('', path, parse=False)
def test_can_import_single_audio_file_without_acoustid(db, mocker):
mocker.patch('acoustid.match', side_effect=acoustid.WebServiceError('test')) mocker.patch('acoustid.match', side_effect=acoustid.WebServiceError('test'))
metadata = { metadata = {
'artist': ['Test artist'], 'artist': ['Test artist'],
@ -56,8 +30,8 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker):
'funkwhale_api.music.metadata.Metadata.get_file_type', 'funkwhale_api.music.metadata.Metadata.get_file_type',
return_value='OggVorbis', return_value='OggVorbis',
) )
track_file = tasks.from_path(os.path.join(DATA_DIR, 'dummy_file.ogg')) track = tasks.import_track_data_from_path(
track = track_file.track os.path.join(DATA_DIR, 'dummy_file.ogg'))
assert track.title == metadata['title'][0] assert track.title == metadata['title'][0]
assert track.mbid == metadata['musicbrainz_trackid'][0] assert track.mbid == metadata['musicbrainz_trackid'][0]
@ -67,3 +41,37 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker):
assert track.album.release_date == datetime.date(2012, 8, 15) assert track.album.release_date == datetime.date(2012, 8, 15)
assert track.artist.name == metadata['artist'][0] assert track.artist.name == metadata['artist'][0]
assert track.artist.mbid == metadata['musicbrainz_artistid'][0] assert track.artist.mbid == metadata['musicbrainz_artistid'][0]
def test_management_command_requires_a_valid_username(factories, mocker):
path = os.path.join(DATA_DIR, 'dummy_file.ogg')
user = factories['users.User'](username='me')
mocker.patch('funkwhale_api.providers.audiofile.management.commands.import_files.Command.do_import') # NOQA
with pytest.raises(CommandError):
call_command('import_files', path, username='not_me', interactive=False)
call_command('import_files', path, username='me', interactive=False)
def test_import_files_creates_a_batch_and_job(factories, mocker):
m = mocker.patch('funkwhale_api.music.tasks.import_job_run.delay')
user = factories['users.User'](username='me')
path = os.path.join(DATA_DIR, 'dummy_file.ogg')
call_command(
'import_files',
path,
username='me',
async=True,
interactive=False)
batch = user.imports.latest('id')
assert batch.source == 'shell'
assert batch.jobs.count() == 1
job = batch.jobs.first()
assert job.status == 'pending'
with open(path, 'rb') as f:
assert job.audio_file.read() == f.read()
assert job.source == 'file://' + path
m.assert_called_once_with(import_job_id=job.pk)

View File

@ -30,7 +30,7 @@ services:
links: links:
- postgres - postgres
- redis - redis
command: celery -A funkwhale_api.taskapp worker command: celery -A funkwhale_api.taskapp worker -l debug
environment: environment:
- "DJANGO_ALLOWED_HOSTS=localhost" - "DJANGO_ALLOWED_HOSTS=localhost"
- "DJANGO_SETTINGS_MODULE=config.settings.local" - "DJANGO_SETTINGS_MODULE=config.settings.local"