Merge branch '153-in-place-import' into 'develop'

Resolve "Allow in-place import"

Closes #153

See merge request funkwhale/funkwhale!150
This commit is contained in:
Eliot Berriot 2018-04-21 17:37:03 +00:00
commit 21d235a630
18 changed files with 318 additions and 31 deletions

View File

@ -9,3 +9,4 @@ FUNKWHALE_HOSTNAME=localhost
FUNKWHALE_PROTOCOL=http
PYTHONDONTWRITEBYTECODE=true
WEBPACK_DEVSERVER_PORT=8080
MUSIC_DIRECTORY_PATH=/music

View File

@ -3,6 +3,52 @@ Changelog
.. towncrier
0.10 (Unreleased)
-----------------
In-place import
^^^^^^^^^^^^^^^
This release includes in-place imports for the CLI import. This means you can
load gigabytes of music into funkwhale without worrying about about Funkwhale
copying those music files in its internal storage and eating your disk space.
This new feature is documented <here> and require additional configuration
to ensure funkwhale and your webserver can serve those files properly.
**Non-docker users:**
Assuming your music is stored in ``/srv/funkwhale/data/music``, add the following
block to your nginx configuration::
location /_protected/music {
internal;
alias /srv/funkwhale/data/music;
}
And the following to your .env file::
MUSIC_DIRECTORY_PATH=/srv/funkwhale/data/music
**Docker users:**
Assuming your music is stored in ``/srv/funkwhale/data/music``, add the following
block to your nginx configuration::
location /_protected/music {
internal;
alias /srv/funkwhale/data/music;
}
Assuming you have the following volume directive in your ``docker-compose.yml``
(it's the default): ``/srv/funkwhale/data/music:/music:ro``, then add
the following to your .env file::
MUSIC_DIRECTORY_PATH=/music
MUSIC_DIRECTORY_SERVE_PATH=/srv/funkwhale/data/music
0.9.1 (2018-04-17)
------------------

View File

@ -441,3 +441,9 @@ EXTERNAL_REQUESTS_VERIFY_SSL = env.bool(
'EXTERNAL_REQUESTS_VERIFY_SSL',
default=True
)
MUSIC_DIRECTORY_PATH = env('MUSIC_DIRECTORY_PATH', default=None)
# on Docker setup, the music directory may not match the host path,
# and we need to know it for it to serve stuff properly
MUSIC_DIRECTORY_SERVE_PATH = env(
'MUSIC_DIRECTORY_SERVE_PATH', default=MUSIC_DIRECTORY_PATH)

View File

@ -43,6 +43,7 @@ class TrackFactory(factory.django.DjangoModelFactory):
artist = factory.SelfAttribute('album.artist')
position = 1
tags = ManyToManyFromList('tags')
class Meta:
model = 'music.Track'
@ -57,6 +58,9 @@ class TrackFileFactory(factory.django.DjangoModelFactory):
model = 'music.TrackFile'
class Params:
in_place = factory.Trait(
audio_file=None,
)
federation = factory.Trait(
audio_file=None,
library_track=factory.SubFactory(LibraryTrackFactory),
@ -105,6 +109,10 @@ class ImportJobFactory(factory.django.DjangoModelFactory):
status='finished',
track_file=factory.SubFactory(TrackFileFactory),
)
in_place = factory.Trait(
status='finished',
audio_file=None,
)
@registry.register(name='music.FileImportJob')

View File

@ -463,6 +463,26 @@ class TrackFile(models.Model):
self.mimetype = utils.guess_mimetype(self.audio_file)
return super().save(**kwargs)
@property
def serve_from_source_path(self):
if not self.source or not self.source.startswith('file://'):
raise ValueError('Cannot serve this file from source')
serve_path = settings.MUSIC_DIRECTORY_SERVE_PATH
prefix = settings.MUSIC_DIRECTORY_PATH
if not serve_path or not prefix:
raise ValueError(
'You need to specify MUSIC_DIRECTORY_SERVE_PATH and '
'MUSIC_DIRECTORY_PATH to serve in-place imported files'
)
file_path = self.source.replace('file://', '', 1)
parts = os.path.split(file_path.replace(prefix, '', 1))
if parts[0] == '/':
parts = parts[1:]
return os.path.join(
serve_path,
*parts
)
IMPORT_STATUS_CHOICES = (
('pending', 'Pending'),

View File

@ -71,7 +71,7 @@ def import_track_from_remote(library_track):
library_track.title, artist=artist, album=album)
def _do_import(import_job, replace, use_acoustid=True):
def _do_import(import_job, replace=False, use_acoustid=True):
from_file = bool(import_job.audio_file)
mbid = import_job.mbid
acoustid_track_id = None
@ -93,6 +93,9 @@ def _do_import(import_job, replace, use_acoustid=True):
track = import_track_data_from_path(import_job.audio_file.path)
elif import_job.library_track:
track = import_track_from_remote(import_job.library_track)
elif import_job.source.startswith('file://'):
track = import_track_data_from_path(
import_job.source.replace('file://', '', 1))
else:
raise ValueError(
'Not enough data to process import, '
@ -123,7 +126,7 @@ def _do_import(import_job, replace, use_acoustid=True):
else:
# no downloading, we hotlink
pass
else:
elif import_job.audio_file:
track_file.download_file()
track_file.save()
import_job.status = 'finished'
@ -133,7 +136,7 @@ def _do_import(import_job, replace, use_acoustid=True):
import_job.audio_file.delete()
import_job.save()
return track.pk
return track_file
@celery.app.task(name='ImportJob.run', bind=True)
@ -147,7 +150,8 @@ def import_job_run(self, import_job, replace=False, use_acoustid=True):
import_job.save(update_fields=['status'])
try:
return _do_import(import_job, replace, use_acoustid=use_acoustid)
tf = _do_import(import_job, replace, use_acoustid=use_acoustid)
return tf.pk if tf else None
except Exception as exc:
if not settings.DEBUG:
try:

View File

@ -224,12 +224,21 @@ class TrackFileViewSet(viewsets.ReadOnlyModelViewSet):
library_track = qs.get(pk=library_track.pk)
library_track.download_audio()
audio_file = library_track.audio_file
file_path = '{}{}'.format(
settings.PROTECT_FILES_PATH,
audio_file.url)
mt = library_track.audio_mimetype
elif audio_file:
file_path = '{}{}'.format(
settings.PROTECT_FILES_PATH,
audio_file.url)
elif f.source and f.source.startswith('file://'):
file_path = '{}{}'.format(
settings.PROTECT_FILES_PATH + '/music',
f.serve_from_source_path)
response = Response()
filename = f.filename
response['X-Accel-Redirect'] = "{}{}".format(
settings.PROTECT_FILES_PATH,
audio_file.url)
response['X-Accel-Redirect'] = file_path
filename = "filename*=UTF-8''{}".format(
urllib.parse.quote(filename))
response["Content-Disposition"] = "attachment; {}".format(filename)

View File

@ -1,6 +1,7 @@
import glob
import os
from django.conf import settings
from django.core.files import File
from django.core.management.base import BaseCommand, CommandError
@ -38,7 +39,20 @@ class Command(BaseCommand):
action='store_true',
dest='exit_on_failure',
default=False,
help='use this flag to disable error catching',
help='Use this flag to disable error catching',
)
parser.add_argument(
'--in-place', '-i',
action='store_true',
dest='in_place',
default=False,
help=(
'Import files without duplicating them into the media directory.'
'For in-place import to work, the music files must be readable'
'by the web-server and funkwhale api and celeryworker processes.'
'You may want to use this if you have a big music library to '
'import and not much disk space available.'
)
)
parser.add_argument(
'--no-acoustid',
@ -53,10 +67,6 @@ class Command(BaseCommand):
)
def handle(self, *args, **options):
# self.stdout.write(self.style.SUCCESS('Successfully closed poll "%s"' % poll_id))
# Recursive is supported only on Python 3.5+, so we pass the option
# only if it's True to avoid breaking on older versions of Python
glob_kwargs = {}
if options['recursive']:
glob_kwargs['recursive'] = True
@ -65,6 +75,21 @@ class Command(BaseCommand):
except TypeError:
raise Exception('You need Python 3.5 to use the --recursive flag')
if options['in_place']:
self.stdout.write(
'Checking imported paths against settings.MUSIC_DIRECTORY_PATH')
p = settings.MUSIC_DIRECTORY_PATH
if not p:
raise CommandError(
'Importing in-place requires setting the '
'MUSIC_DIRECTORY_PATH variable')
for m in matching:
if not m.startswith(p):
raise CommandError(
'Importing in-place only works if importing'
'from {} (MUSIC_DIRECTORY_PATH), as this directory'
'needs to be accessible by the webserver.'
'Culprit: {}'.format(p, m))
if not matching:
raise CommandError('No file matching pattern, aborting')
@ -92,6 +117,10 @@ class Command(BaseCommand):
self.stdout.write('- {} new files'.format(
len(filtered['new'])))
self.stdout.write('Selected options: {}'.format(', '.join([
'no acoustid' if options['no_acoustid'] else 'use acoustid',
'in place' if options['in_place'] else 'copy music files',
])))
if len(filtered['new']) == 0:
self.stdout.write('Nothing new to import, exiting')
return
@ -164,11 +193,12 @@ class Command(BaseCommand):
job = batch.jobs.create(
source='file://' + path,
)
name = os.path.basename(path)
with open(path, 'rb') as f:
job.audio_file.save(name, File(f))
if not options['in_place']:
name = os.path.basename(path)
with open(path, 'rb') as f:
job.audio_file.save(name, File(f))
job.save()
job.save()
import_handler(
import_job_id=job.pk,
use_acoustid=not options['no_acoustid'])

View File

@ -231,3 +231,15 @@ def test_import_batch_notifies_followers(
on_behalf_of=library_actor,
to=[f1.actor.url]
)
def test__do_import_in_place_mbid(factories, tmpfile):
path = '/test.ogg'
job = factories['music.ImportJob'](
in_place=True, source='file:///test.ogg')
track = factories['music.Track'](mbid=job.mbid)
tf = tasks._do_import(job, use_acoustid=False)
assert bool(tf.audio_file) is False
assert tf.source == 'file:///test.ogg'

View File

@ -93,6 +93,25 @@ def test_can_proxy_remote_track(
assert library_track.audio_file.read() == b'test'
def test_can_serve_in_place_imported_file(
factories, settings, api_client, r_mock):
settings.PROTECT_AUDIO_FILES = False
settings.MUSIC_DIRECTORY_SERVE_PATH = '/host/music'
settings.MUSIC_DIRECTORY_PATH = '/music'
settings.MUSIC_DIRECTORY_PATH = '/music'
track_file = factories['music.TrackFile'](
in_place=True,
source='file:///music/test.ogg')
response = api_client.get(track_file.path)
assert response.status_code == 200
assert response['X-Accel-Redirect'] == '{}{}'.format(
settings.PROTECT_FILES_PATH,
'/music/host/music/test.ogg'
)
def test_can_create_import_from_federation_tracks(
factories, superuser_api_client, mocker):
lts = factories['federation.LibraryTrack'].create_batch(size=5)

View File

@ -58,6 +58,20 @@ def test_management_command_requires_a_valid_username(factories, mocker):
call_command('import_files', path, username='me', interactive=False)
def test_in_place_import_only_from_music_dir(factories, settings):
user = factories['users.User'](username='me')
settings.MUSIC_DIRECTORY_PATH = '/nope'
path = os.path.join(DATA_DIR, 'dummy_file.ogg')
with pytest.raises(CommandError):
call_command(
'import_files',
path,
in_place=True,
username='me',
interactive=False
)
def test_import_files_creates_a_batch_and_job(factories, mocker):
m = mocker.patch('funkwhale_api.music.tasks.import_job_run')
user = factories['users.User'](username='me')
@ -137,6 +151,27 @@ def test_import_files_works_with_utf8_file_name(factories, mocker):
use_acoustid=False)
def test_import_files_in_place(factories, mocker, settings):
settings.MUSIC_DIRECTORY_PATH = DATA_DIR
m = mocker.patch('funkwhale_api.music.tasks.import_job_run')
user = factories['users.User'](username='me')
path = os.path.join(DATA_DIR, 'utf8-éà◌.ogg')
call_command(
'import_files',
path,
username='me',
async=False,
in_place=True,
no_acoustid=True,
interactive=False)
batch = user.imports.latest('id')
job = batch.jobs.first()
assert bool(job.audio_file) is False
m.assert_called_once_with(
import_job_id=job.pk,
use_acoustid=False)
def test_storage_rename_utf_8_files(factories):
tf = factories['music.TrackFile'](audio_file__filename='été.ogg')
assert tf.audio_file.name.endswith('ete.ogg')

View File

@ -0,0 +1 @@
Can now import files in-place from the CLI importe (#155)

View File

@ -1,17 +1,22 @@
# If you have any doubts about what a setting does,
# check https://docs.funkwhale.audio/configuration.html#configuration-reference
# If you're tweaking this file from the template, ensure you edit at least the
# following variables:
# - DJANGO_SECRET_KEY
# - DJANGO_ALLOWED_HOSTS
# - FUNKWHALE_URL
# Additionaly, on non-docker setup **only**, you'll also have to tweak/uncomment those variables:
# On non-docker setup **only**, you'll also have to tweak/uncomment those variables:
# - DATABASE_URL
# - CACHE_URL
# - STATIC_ROOT
# - MEDIA_ROOT
#
# You **don't** need to update those variables on pure docker setups.
#
# Additional options you may want to check:
# - MUSIC_DIRECTORY_PATH and MUSIC_DIRECTORY_SERVE_PATH if you plan to use
# in-place import
# Docker only
# -----------
@ -19,7 +24,9 @@
# (it will be interpolated in docker-compose file)
# You can comment or ignore this if you're not using docker
FUNKWHALE_VERSION=latest
MUSIC_DIRECTORY_PATH=/music
# End of Docker-only configuration
# General configuration
# ---------------------
@ -34,6 +41,7 @@ FUNKWHALE_API_PORT=5000
# your instance
FUNKWHALE_URL=https://yourdomain.funwhale
# API/Django configuration
# Database configuration
@ -94,3 +102,9 @@ FEDERATION_ENABLED=True
# means anyone can subscribe to your library and import your file,
# use with caution.
FEDERATION_MUSIC_NEEDS_APPROVAL=True
# In-place import settings
# You can safely leave those settings uncommented if you don't plan to use
# in place imports.
# MUSIC_DIRECTORY_PATH=
# MUSIC_DIRECTORY_SERVE_PATH=

View File

@ -84,6 +84,14 @@ server {
alias /srv/funkwhale/data/media;
}
location /_protected/music {
# this is an internal location that is used to serve
# audio files once correct permission / authentication
# has been checked on API side
internal;
alias /srv/funkwhale/data/music;
}
# Transcoding logic and caching
location = /transcode-auth {
include /etc/nginx/funkwhale_proxy.conf;

View File

@ -65,7 +65,7 @@ services:
- "CACHE_URL=redis://redis:6379/0"
volumes:
- ./api:/app
- "${MUSIC_DIRECTORY-./data/music}:/music"
- "${MUSIC_DIRECTORY-./data/music}:/music:ro"
networks:
- internal
api:
@ -78,7 +78,7 @@ services:
command: python /app/manage.py runserver 0.0.0.0:12081
volumes:
- ./api:/app
- "${MUSIC_DIRECTORY-./data/music}:/music"
- "${MUSIC_DIRECTORY-./data/music}:/music:ro"
environment:
- "FUNKWHALE_HOSTNAME=${FUNKWHALE_HOSTNAME-localhost}"
- "FUNKWHALE_HOSTNAME_SUFFIX=funkwhale.test"
@ -107,6 +107,7 @@ services:
volumes:
- ./docker/nginx/conf.dev:/etc/nginx/nginx.conf
- ./docker/nginx/entrypoint.sh:/entrypoint.sh:ro
- "${MUSIC_DIRECTORY-./data/music}:/music:ro"
- ./deploy/funkwhale_proxy.conf:/etc/nginx/funkwhale_proxy.conf.template:ro
- ./api/funkwhale_api/media:/protected/media
ports:

View File

@ -42,6 +42,10 @@ http {
internal;
alias /protected/media;
}
location /_protected/music {
internal;
alias /music;
}
location = /transcode-auth {
# needed so we can authenticate transcode requests, but still
# cache the result

View File

@ -33,3 +33,44 @@ The URL should be ``/api/admin/dynamic_preferences/globalpreferencemodel/`` (pre
If you plan to use acoustid and external imports
(e.g. with the youtube backends), you should edit the corresponding
settings in this interface.
Configuration reference
-----------------------
.. _setting-MUSIC_DIRECTORY_PATH:
``MUSIC_DIRECTORY_PATH``
^^^^^^^^^^^^^^^^^^^^^^^^
Default: ``None``
The path on your server where Funwkhale can import files using :ref:`in-place import
<in-place-import>`. It must be readable by the webserver and funkwhale
api and worker processes.
On docker installations, we recommend you use the default of ``/music``
for this value. For non-docker installation, you can use any absolute path.
``/srv/funkwhale/data/music`` is a safe choice if you don't know what to use.
.. note:: This path should not include any trailing slash
.. _setting-MUSIC_DIRECTORY_SERVE_PATH:
``MUSIC_DIRECTORY_SERVE_PATH``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Default: :ref:`setting-MUSIC_DIRECTORY_PATH`
When using Docker, the value of :ref:`MUSIC_DIRECTORY_PATH` in your containers
may differ from the real path on your host. Assuming you have the following directive
in your :file:`docker-compose.yml` file::
volumes:
- /srv/funkwhale/data/music:/music:ro
Then, the value of :ref:`setting-MUSIC_DIRECTORY_SERVE_PATH` should be
``/srv/funkwhale/data``. This must be readable by the webserver.
On non-docker setup, you don't need to configure this setting.
.. note:: This path should not include any trailing slash

View File

@ -22,8 +22,15 @@ to the ``/music`` directory on the container:
docker-compose run --rm api python manage.py import_files "/music/**/*.ogg" --recursive --noinput
For the best results, we recommand tagging your music collection through
`Picard <http://picard.musicbrainz.org/>`_ in order to have the best quality metadata.
The import command supports several options, and you can check the help to
get details::
docker-compose run --rm api python manage.py import_files --help
.. note::
For the best results, we recommand tagging your music collection through
`Picard <http://picard.musicbrainz.org/>`_ in order to have the best quality metadata.
.. note::
@ -39,18 +46,39 @@ For the best results, we recommand tagging your music collection through
At the moment, only OGG/Vorbis and MP3 files with ID3 tags are supported
.. note::
The --recursive flag will work only on Python 3.5+, which is the default
version When using Docker or Debian 9. If you use an older version of Python,
remove the --recursive flag and use more explicit import patterns instead::
.. _in-place-import:
# this will only import ogg files at the second level
"/srv/funkwhale/data/music/*/*.ogg"
# this will only import ogg files in the fiven directory
"/srv/funkwhale/data/music/System-of-a-down/*.ogg"
In-place import
^^^^^^^^^^^^^^^
By default, the CLI-importer will copy imported files to Funkwhale's internal
storage. This means importing a 1Gb library will result in the same amount
of space being used by Funkwhale.
While this behaviour has some benefits (easier backups and configuration),
it's not always the best choice, especially if you have a huge library
to import and don't want to double your disk usage.
The CLI importer supports an additional ``--in-place`` option that triggers the
following behaviour during import:
1. Imported files are not store in funkwhale anymore
2. Instead, Funkwhale will store the file path and use it to serve the music
Because those files are not managed by Funkwhale, we offer additional
configuration options to ensure the webserver can serve them properly:
- :ref:`setting-MUSIC_DIRECTORY_PATH`
- :ref:`setting-MUSIC_DIRECTORY_SERVE_PATH`
.. warning::
While in-place import is faster and less disk-space-hungry, it's also
more fragile: if, for some reason, you move or rename the source files,
Funkwhale will not be able to serve those files anymore.
Thus, be especially careful when you manipulate the source files.
Getting demo tracks
^^^^^^^^^^^^^^^^^^^