Now use import job everywhere, even for direct file imports

2017-12-27 20:29:26 +01:00 · 2017-12-27 20:29:26 +01:00 · 2e616282fd
parent 5d2dbbc828
commit 2e616282fd
9 changed files with 318 additions and 66 deletions
--- a/api/funkwhale_api/music/factories.py
+++ b/api/funkwhale_api/music/factories.py
@ -72,6 +72,14 @@ class ImportJobFactory(factory.django.DjangoModelFactory):
        model = 'music.ImportJob'
@registry.register(name='music.FileImportJob')
 class FileImportJobFactory(ImportJobFactory):
    source = 'file://'
    mbid = None
    audio_file = factory.django.FileField(
        from_path=os.path.join(SAMPLES_PATH, 'test.ogg'))
@registry.register
 class WorkFactory(factory.django.DjangoModelFactory):
    mbid = factory.Faker('uuid4')
--- a/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py
+++ b/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py
@ -0,0 +1,28 @@
 # Generated by Django 2.0 on 2017-12-27 17:28
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('music', '0016_trackfile_acoustid_track_id'),
    ]
    operations = [
        migrations.AddField(
            model_name='importbatch',
            name='source',
            field=models.CharField(choices=[('api', 'api'), ('shell', 'shell')], default='api', max_length=30),
        ),
        migrations.AddField(
            model_name='importjob',
            name='audio_file',
            field=models.FileField(blank=True, max_length=255, null=True, upload_to='imports/%Y/%m/%d'),
        ),
        migrations.AlterField(
            model_name='importjob',
            name='mbid',
            field=models.UUIDField(blank=True, editable=False, null=True),
        ),
    ]
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@ -384,9 +384,17 @@ class TrackFile(models.Model):
 class ImportBatch(models.Model):
    IMPORT_BATCH_SOURCES = [
        ('api', 'api'),
        ('shell', 'shell')
    ]
    source = models.CharField(
        max_length=30, default='api', choices=IMPORT_BATCH_SOURCES)
    creation_date = models.DateTimeField(default=timezone.now)
    submitted_by = models.ForeignKey(
-        'users.User', related_name='imports', on_delete=models.CASCADE)
+        'users.User',
        related_name='imports',
        on_delete=models.CASCADE)
    class Meta:
        ordering = ['-creation_date']
@ -411,12 +419,16 @@ class ImportJob(models.Model):
        blank=True,
        on_delete=models.CASCADE)
    source = models.URLField()
-    mbid = models.UUIDField(editable=False)
+    mbid = models.UUIDField(editable=False, null=True, blank=True)
    STATUS_CHOICES = (
        ('pending', 'Pending'),
-        ('finished', 'finished'),
+        ('finished', 'Finished'),
        ('errored', 'Errored'),
        ('skipped', 'Skipped'),
    )
    status = models.CharField(choices=STATUS_CHOICES, default='pending', max_length=30)
    audio_file = models.FileField(
        upload_to='imports/%Y/%m/%d', max_length=255, null=True, blank=True)
    class Meta:
        ordering = ('id', )
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@ -1,3 +1,5 @@
 from django.core.files.base import ContentFile
 from funkwhale_api.taskapp import celery
 from funkwhale_api.providers.acoustid import get_acoustid_client
@ -20,29 +22,72 @@ def set_acoustid_on_track_file(track_file):
        return update(result['id'])
 def _do_import(import_job, replace):
    from_file = bool(import_job.audio_file)
    mbid = import_job.mbid
    acoustid_track_id = None
    duration = None
    track = None
    if not mbid and from_file:
        # we try to deduce mbid from acoustid
        client = get_acoustid_client()
        match = client.get_best_match(import_job.audio_file.path)
        if not match:
            raise ValueError('Cannot get match')
        duration = match['recordings'][0]['duration']
        mbid = match['recordings'][0]['id']
        acoustid_track_id = match['id']
    if mbid:
        track, _ = models.Track.get_or_create_from_api(mbid=mbid)
    else:
        track = import_track_data_from_path(import_job.audio_file.path)
    track_file = None
    if replace:
        track_file = track.files.first()
    elif track.files.count() > 0:
        if import_job.audio_file:
            import_job.audio_file.delete()
        import_job.status = 'skipped'
        import_job.save()
        return
    track_file = track_file or models.TrackFile(
        track=track, source=import_job.source)
    track_file.acoustid_track_id = acoustid_track_id
    if from_file:
        track_file.audio_file = ContentFile(import_job.audio_file.read())
        track_file.audio_file.name = import_job.audio_file.name
        track_file.duration = duration
    else:
        track_file.download_file()
    track_file.save()
    import_job.status = 'finished'
    import_job.track_file = track_file
    if import_job.audio_file:
        # it's imported on the track, we don't need it anymore
        import_job.audio_file.delete()
    import_job.save()
    return track.pk
@celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(models.ImportJob, 'import_job')
 def import_job_run(self, import_job, replace=False):
-    try:
+    def mark_errored():
-        track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid)
+        import_job.status = 'errored'
        track_file = None
        if replace:
            track_file = track.files.first()
        elif track.files.count() > 0:
            return
        track_file = track_file or models.TrackFile(
            track=track, source=import_job.source)
        track_file.download_file()
        track_file.save()
        import_job.status = 'finished'
        import_job.track_file = track_file
        import_job.save()
        return track.pk
    try:
        return _do_import(import_job, replace)
    except Exception as exc:
        if not settings.DEBUG:
-            raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
+            try:
                self.retry(exc=exc, countdown=30, max_retries=3)
            except:
                mark_errored()
                raise
        mark_errored()
        raise
--- a/api/funkwhale_api/providers/audiofile/management/commands/import_files.py
+++ b/api/funkwhale_api/providers/audiofile/management/commands/import_files.py
@ -1,6 +1,10 @@
 import glob
 import os
 from django.core.files import File
 from django.core.management.base import BaseCommand, CommandError
-from funkwhale_api.providers.audiofile import tasks
+from funkwhale_api.music import tasks
 from funkwhale_api.users.models import User
 class Command(BaseCommand):
@ -15,6 +19,11 @@ class Command(BaseCommand):
            default=False,
            help='Will match the pattern recursively (including subdirectories)',
        )
        parser.add_argument(
            '--username',
            dest='username',
            help='The username of the user you want to be bound to the import',
        )
        parser.add_argument(
            '--async',
            action='store_true',
@ -46,6 +55,20 @@ class Command(BaseCommand):
        if not matching:
            raise CommandError('No file matching pattern, aborting')
        user = None
        if options['username']:
            try:
                user = User.objects.get(username=options['username'])
            except User.DoesNotExist:
                raise CommandError('Invalid username')
        else:
            # we bind the import to the first registered superuser
            try:
                user = User.objects.filter(is_superuser=True).order_by('pk').first()
                assert user is not None
            except AssertionError:
                raise CommandError(
                    'No superuser available, please provide a --username')
        if options['interactive']:
            message = (
                'Are you sure you want to do this?\n\n'
@ -54,18 +77,35 @@ class Command(BaseCommand):
            if input(''.join(message)) != 'yes':
                raise CommandError("Import cancelled.")
-        message = 'Importing {}...'
+        batch = self.do_import(matching, user=user, options=options)
        if options['async']:
            message = 'Launching import for {}...'
        for path in matching:
            self.stdout.write(message.format(path))
            try:
                tasks.from_path(path)
            except Exception as e:
                self.stdout.write('Error: {}'.format(e))
        message = 'Successfully imported {} tracks'
        if options['async']:
            message = 'Successfully launched import for {} tracks'
        self.stdout.write(message.format(len(matching)))
        self.stdout.write(
            "For details, please refer to import batch #".format(batch.pk))
    def do_import(self, matching, user, options):
        message = 'Importing {}...'
        if options['async']:
            message = 'Launching import for {}...'
        # we create an import batch binded to the user
        batch = user.imports.create(source='shell')
        async = options['async']
        handler = tasks.import_job_run.delay if async else tasks.import_job_run
        for path in matching:
            job = batch.jobs.create(
                source='file://' + path,
            )
            name = os.path.basename(path)
            with open(path, 'rb') as f:
                job.audio_file.save(name, File(f))
            job.save()
            try:
                handler(import_job_id=job.pk)
            except Exception as e:
                self.stdout.write('Error: {}'.format(e))
        return batch
--- a/api/funkwhale_api/providers/audiofile/tasks.py
+++ b/api/funkwhale_api/providers/audiofile/tasks.py
@ -8,7 +8,7 @@ from funkwhale_api.providers.acoustid import get_acoustid_client
 from funkwhale_api.music import models, metadata
-def import_metadata_without_musicbrainz(path):
+def import_track_data_from_path(path):
    data = metadata.Metadata(path)
    artist = models.Artist.objects.get_or_create(
        name__iexact=data.get('artist'),
@ -53,7 +53,7 @@ def from_path(path):
        result = client.get_best_match(path)
        acoustid_track_id = result['id']
    except acoustid.WebServiceError:
-        track = import_metadata_without_musicbrainz(path)
+        track = import_track_data_from_path(path)
    except (TypeError, KeyError):
        track = import_metadata_without_musicbrainz(path)
    else:
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@ -1,7 +1,13 @@
-from funkwhale_api.providers.acoustid import get_acoustid_client
+import os
 import pytest
 from funkwhale_api.providers.acoustid import get_acoustid_client
 from funkwhale_api.music import tasks
 from . import data as api_data
 DATA_DIR = os.path.dirname(os.path.abspath(__file__))
 def test_set_acoustid_on_track_file(factories, mocker):
    track_file = factories['music.TrackFile'](acoustid_track_id=None)
@ -40,3 +46,108 @@ def test_set_acoustid_on_track_file_required_high_score(factories, mocker):
    track_file.refresh_from_db()
    assert track_file.acoustid_track_id is None
 def test_import_job_can_run_with_file_and_acoustid(factories, mocker):
    path = os.path.join(DATA_DIR, 'test.ogg')
    mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
    acoustid_payload = {
        'results': [
            {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
             'recordings': [
                {
                 'duration': 268,
                 'id': mbid}],
            'score': 0.860825}],
        'status': 'ok'
    }
    mocker.patch(
        'funkwhale_api.musicbrainz.api.artists.get',
        return_value=api_data.artists['get']['adhesive_wombat'])
    mocker.patch(
        'funkwhale_api.musicbrainz.api.releases.get',
        return_value=api_data.albums['get']['marsupial'])
    mocker.patch(
        'funkwhale_api.musicbrainz.api.recordings.search',
        return_value=api_data.tracks['search']['8bitadventures'])
    mocker.patch('acoustid.match', return_value=acoustid_payload)
    job = factories['music.FileImportJob'](audio_file__path=path)
    f = job.audio_file
    tasks.import_job_run(import_job_id=job.pk)
    job.refresh_from_db()
    track_file = job.track_file
    with open(path, 'rb') as f:
        assert track_file.audio_file.read() == f.read()
    assert track_file.duration == 268
    # audio file is deleted from import job once persisted to audio file
    assert not job.audio_file
    assert job.status == 'finished'
    assert job.source == 'file://'
 def test_import_job_can_be_skipped(factories, mocker):
    path = os.path.join(DATA_DIR, 'test.ogg')
    mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
    track_file = factories['music.TrackFile'](track__mbid=mbid)
    acoustid_payload = {
        'results': [
            {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
             'recordings': [
                {
                 'duration': 268,
                 'id': mbid}],
            'score': 0.860825}],
        'status': 'ok'
    }
    mocker.patch(
        'funkwhale_api.musicbrainz.api.artists.get',
        return_value=api_data.artists['get']['adhesive_wombat'])
    mocker.patch(
        'funkwhale_api.musicbrainz.api.releases.get',
        return_value=api_data.albums['get']['marsupial'])
    mocker.patch(
        'funkwhale_api.musicbrainz.api.recordings.search',
        return_value=api_data.tracks['search']['8bitadventures'])
    mocker.patch('acoustid.match', return_value=acoustid_payload)
    job = factories['music.FileImportJob'](audio_file__path=path)
    f = job.audio_file
    tasks.import_job_run(import_job_id=job.pk)
    job.refresh_from_db()
    assert job.track_file is None
    # audio file is deleted from import job once persisted to audio file
    assert not job.audio_file
    assert job.status == 'skipped'
 def test_import_job_can_be_errored(factories, mocker):
    path = os.path.join(DATA_DIR, 'test.ogg')
    mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
    track_file = factories['music.TrackFile'](track__mbid=mbid)
    acoustid_payload = {
        'results': [
            {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
             'recordings': [
                {
                 'duration': 268,
                 'id': mbid}],
            'score': 0.860825}],
        'status': 'ok'
    }
    class MyException(Exception):
        pass
    mocker.patch('acoustid.match', side_effect=MyException())
    job = factories['music.FileImportJob'](
        audio_file__path=path, track_file=None)
    with pytest.raises(MyException):
        tasks.import_job_run(import_job_id=job.pk)
    job.refresh_from_db()
    assert job.track_file is None
    assert job.status == 'errored'
--- a/api/tests/test_import_audio_file.py
+++ b/api/tests/test_import_audio_file.py
@ -1,8 +1,10 @@
 import pytest
 import acoustid
 import datetime
 import os
 from django.core.management import call_command
 from django.core.management.base import CommandError
 from .music import data as api_data
 from funkwhale_api.providers.audiofile import tasks
 DATA_DIR = os.path.join(
@ -11,35 +13,7 @@ DATA_DIR = os.path.join(
 )
-def test_import_file_with_acoustid(db, mocker, preferences):
+def test_can_create_track_from_file_metadata(db, mocker):
    mbid = api_data.tracks['get']['8bitadventures']['recording']['id']
    payload = {
        'results': [{
            'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
            'recordings': [{'id': mbid}],
            'score': 0.86
        }]
    }
    path = os.path.join(DATA_DIR, 'dummy_file.ogg')
    m = mocker.patch('acoustid.match', return_value=payload)
    mocker.patch(
        'funkwhale_api.musicbrainz.api.artists.get',
        return_value=api_data.artists['get']['adhesive_wombat'])
    mocker.patch(
        'funkwhale_api.musicbrainz.api.releases.get',
        return_value=api_data.albums['get']['marsupial'])
    mocker.patch(
        'funkwhale_api.musicbrainz.api.recordings.get',
        return_value=api_data.tracks['get']['8bitadventures'])
    track_file = tasks.from_path(path)
    result = payload['results'][0]
    assert track_file.acoustid_track_id == result['id']
    assert track_file.track.mbid == result['recordings'][0]['id']
    m.assert_called_once_with('', path, parse=False)
 def test_can_import_single_audio_file_without_acoustid(db, mocker):
    mocker.patch('acoustid.match', side_effect=acoustid.WebServiceError('test'))
    metadata = {
        'artist': ['Test artist'],
@ -56,8 +30,8 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker):
        'funkwhale_api.music.metadata.Metadata.get_file_type',
        return_value='OggVorbis',
    )
-    track_file = tasks.from_path(os.path.join(DATA_DIR, 'dummy_file.ogg'))
+    track = tasks.import_track_data_from_path(
-    track = track_file.track
+        os.path.join(DATA_DIR, 'dummy_file.ogg'))
    assert track.title == metadata['title'][0]
    assert track.mbid == metadata['musicbrainz_trackid'][0]
@ -67,3 +41,37 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker):
    assert track.album.release_date == datetime.date(2012, 8, 15)
    assert track.artist.name == metadata['artist'][0]
    assert track.artist.mbid == metadata['musicbrainz_artistid'][0]
 def test_management_command_requires_a_valid_username(factories, mocker):
    path = os.path.join(DATA_DIR, 'dummy_file.ogg')
    user = factories['users.User'](username='me')
    mocker.patch('funkwhale_api.providers.audiofile.management.commands.import_files.Command.do_import')  # NOQA
    with pytest.raises(CommandError):
        call_command('import_files', path, username='not_me', interactive=False)
    call_command('import_files', path, username='me', interactive=False)
 def test_import_files_creates_a_batch_and_job(factories, mocker):
    m = mocker.patch('funkwhale_api.music.tasks.import_job_run.delay')
    user = factories['users.User'](username='me')
    path = os.path.join(DATA_DIR, 'dummy_file.ogg')
    call_command(
        'import_files',
        path,
        username='me',
        async=True,
        interactive=False)
    batch = user.imports.latest('id')
    assert batch.source == 'shell'
    assert batch.jobs.count() == 1
    job = batch.jobs.first()
    assert job.status == 'pending'
    with open(path, 'rb') as f:
        assert job.audio_file.read() == f.read()
    assert job.source == 'file://' + path
    m.assert_called_once_with(import_job_id=job.pk)
--- a/dev.yml
+++ b/dev.yml
@ -30,7 +30,7 @@ services:
    links:
     - postgres
     - redis
-    command: celery -A funkwhale_api.taskapp worker
+    command: celery -A funkwhale_api.taskapp worker -l debug
    environment:
      - "DJANGO_ALLOWED_HOSTS=localhost"
      - "DJANGO_SETTINGS_MODULE=config.settings.local"