Improved CLI importer reliability and UX
This commit is contained in:
parent
6678c46d88
commit
ecb7c464a1
|
@ -47,7 +47,7 @@ class Command(BaseCommand):
|
||||||
self.stdout.write("Checking {} in-place imported files…".format(total))
|
self.stdout.write("Checking {} in-place imported files…".format(total))
|
||||||
|
|
||||||
missing = []
|
missing = []
|
||||||
for i, row in enumerate(candidates.values("id", "source")):
|
for i, row in enumerate(candidates.values("id", "source").iterator()):
|
||||||
path = row["source"].replace("file://", "")
|
path = row["source"].replace("file://", "")
|
||||||
progress(self.stdout, i + 1, total)
|
progress(self.stdout, i + 1, total)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
|
|
|
@ -1,13 +1,40 @@
|
||||||
import glob
|
import itertools
|
||||||
import os
|
import os
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import time
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.files import File
|
from django.core.files import File
|
||||||
from django.core.management.base import BaseCommand, CommandError
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from funkwhale_api.music import models, tasks
|
from funkwhale_api.music import models, tasks, utils
|
||||||
|
|
||||||
|
|
||||||
|
def crawl_dir(dir, extensions, recursive=True):
|
||||||
|
if os.path.isfile(dir):
|
||||||
|
yield dir
|
||||||
|
return
|
||||||
|
with os.scandir(dir) as scanner:
|
||||||
|
for entry in scanner:
|
||||||
|
if entry.is_file():
|
||||||
|
for e in extensions:
|
||||||
|
if entry.name.lower().endswith(".{}".format(e.lower())):
|
||||||
|
yield entry.path
|
||||||
|
elif recursive and entry.is_dir():
|
||||||
|
yield from crawl_dir(entry, extensions, recursive=recursive)
|
||||||
|
|
||||||
|
|
||||||
|
def batch(iterable, n=1):
|
||||||
|
has_entries = True
|
||||||
|
while has_entries:
|
||||||
|
current = []
|
||||||
|
for i in range(0, n):
|
||||||
|
try:
|
||||||
|
current.append(next(iterable))
|
||||||
|
except StopIteration:
|
||||||
|
has_entries = False
|
||||||
|
yield current
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
|
@ -89,6 +116,7 @@ class Command(BaseCommand):
|
||||||
"of overhead on your server and on servers you are federating with."
|
"of overhead on your server and on servers you are federating with."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
parser.add_argument("-e", "--extension", nargs="+")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--broadcast",
|
"--broadcast",
|
||||||
|
@ -119,10 +147,17 @@ class Command(BaseCommand):
|
||||||
help="Do NOT prompt the user for input of any kind.",
|
help="Do NOT prompt the user for input of any kind.",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
parser.add_argument(
|
||||||
glob_kwargs = {}
|
"--batch-size",
|
||||||
matching = []
|
"-s",
|
||||||
|
dest="batch_size",
|
||||||
|
default=1000,
|
||||||
|
type=int,
|
||||||
|
help="Size of each batch, only used when crawling large collections",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
self.is_confirmed = False
|
||||||
try:
|
try:
|
||||||
library = models.Library.objects.select_related("actor__user").get(
|
library = models.Library.objects.select_related("actor__user").get(
|
||||||
uuid__startswith=options["library_id"]
|
uuid__startswith=options["library_id"]
|
||||||
|
@ -133,14 +168,100 @@ class Command(BaseCommand):
|
||||||
if not library.actor.get_user():
|
if not library.actor.get_user():
|
||||||
raise CommandError("Library {} is not a local library".format(library.uuid))
|
raise CommandError("Library {} is not a local library".format(library.uuid))
|
||||||
|
|
||||||
if options["recursive"]:
|
if options["in_place"]:
|
||||||
glob_kwargs["recursive"] = True
|
self.stdout.write(
|
||||||
for import_path in options["path"]:
|
"Checking imported paths against settings.MUSIC_DIRECTORY_PATH"
|
||||||
matching += glob.glob(import_path, **glob_kwargs)
|
)
|
||||||
raw_matching = sorted(list(set(matching)))
|
|
||||||
|
|
||||||
|
for import_path in options["path"]:
|
||||||
|
p = settings.MUSIC_DIRECTORY_PATH
|
||||||
|
if not p:
|
||||||
|
raise CommandError(
|
||||||
|
"Importing in-place requires setting the "
|
||||||
|
"MUSIC_DIRECTORY_PATH variable"
|
||||||
|
)
|
||||||
|
if p and not import_path.startswith(p):
|
||||||
|
raise CommandError(
|
||||||
|
"Importing in-place only works if importing"
|
||||||
|
"from {} (MUSIC_DIRECTORY_PATH), as this directory"
|
||||||
|
"needs to be accessible by the webserver."
|
||||||
|
"Culprit: {}".format(p, import_path)
|
||||||
|
)
|
||||||
|
|
||||||
|
extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
|
||||||
|
crawler = itertools.chain(
|
||||||
|
*[
|
||||||
|
crawl_dir(p, extensions=extensions, recursive=options["recursive"])
|
||||||
|
for p in options["path"]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
errors = []
|
||||||
|
total = 0
|
||||||
|
start_time = time.time()
|
||||||
|
reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())
|
||||||
|
|
||||||
|
import_url = "{}://{}/content/libraries/{}/upload?{}"
|
||||||
|
import_url = import_url.format(
|
||||||
|
settings.FUNKWHALE_PROTOCOL,
|
||||||
|
settings.FUNKWHALE_HOSTNAME,
|
||||||
|
str(library.uuid),
|
||||||
|
urllib.parse.urlencode([("import", reference)]),
|
||||||
|
)
|
||||||
|
self.stdout.write(
|
||||||
|
"For details, please refer to import reference '{}' or URL {}".format(
|
||||||
|
reference, import_url
|
||||||
|
)
|
||||||
|
)
|
||||||
|
batch_start = None
|
||||||
|
batch_duration = None
|
||||||
|
for i, entries in enumerate(batch(crawler, options["batch_size"])):
|
||||||
|
total += len(entries)
|
||||||
|
batch_start = time.time()
|
||||||
|
time_stats = ""
|
||||||
|
if i > 0:
|
||||||
|
time_stats = " - running for {}s, previous batch took {}s".format(
|
||||||
|
int(time.time() - start_time), int(batch_duration),
|
||||||
|
)
|
||||||
|
if entries:
|
||||||
|
self.stdout.write(
|
||||||
|
"Handling batch {} ({} items){}".format(
|
||||||
|
i + 1, options["batch_size"], time_stats,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
batch_errors = self.handle_batch(
|
||||||
|
library=library,
|
||||||
|
paths=entries,
|
||||||
|
batch=i + 1,
|
||||||
|
reference=reference,
|
||||||
|
options=options,
|
||||||
|
)
|
||||||
|
if batch_errors:
|
||||||
|
errors += batch_errors
|
||||||
|
|
||||||
|
batch_duration = time.time() - batch_start
|
||||||
|
|
||||||
|
message = "Successfully imported {} tracks in {}s"
|
||||||
|
if options["async_"]:
|
||||||
|
message = "Successfully launched import for {} tracks in {}s"
|
||||||
|
|
||||||
|
self.stdout.write(
|
||||||
|
message.format(total - len(errors), int(time.time() - start_time))
|
||||||
|
)
|
||||||
|
if len(errors) > 0:
|
||||||
|
self.stderr.write("{} tracks could not be imported:".format(len(errors)))
|
||||||
|
|
||||||
|
for path, error in errors:
|
||||||
|
self.stderr.write("- {}: {}".format(path, error))
|
||||||
|
|
||||||
|
self.stdout.write(
|
||||||
|
"For details, please refer to import reference '{}' or URL {}".format(
|
||||||
|
reference, import_url
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_batch(self, library, paths, batch, reference, options):
|
||||||
matching = []
|
matching = []
|
||||||
for m in raw_matching:
|
for m in paths:
|
||||||
# In some situations, the path is encoded incorrectly on the filesystem
|
# In some situations, the path is encoded incorrectly on the filesystem
|
||||||
# so we filter out faulty paths and display a warning to the user.
|
# so we filter out faulty paths and display a warning to the user.
|
||||||
# see https://dev.funkwhale.audio/funkwhale/funkwhale/issues/138
|
# see https://dev.funkwhale.audio/funkwhale/funkwhale/issues/138
|
||||||
|
@ -160,96 +281,57 @@ class Command(BaseCommand):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if options["in_place"]:
|
|
||||||
self.stdout.write(
|
|
||||||
"Checking imported paths against settings.MUSIC_DIRECTORY_PATH"
|
|
||||||
)
|
|
||||||
p = settings.MUSIC_DIRECTORY_PATH
|
|
||||||
if not p:
|
|
||||||
raise CommandError(
|
|
||||||
"Importing in-place requires setting the "
|
|
||||||
"MUSIC_DIRECTORY_PATH variable"
|
|
||||||
)
|
|
||||||
for m in matching:
|
|
||||||
if not m.startswith(p):
|
|
||||||
raise CommandError(
|
|
||||||
"Importing in-place only works if importing"
|
|
||||||
"from {} (MUSIC_DIRECTORY_PATH), as this directory"
|
|
||||||
"needs to be accessible by the webserver."
|
|
||||||
"Culprit: {}".format(p, m)
|
|
||||||
)
|
|
||||||
if not matching:
|
if not matching:
|
||||||
raise CommandError("No file matching pattern, aborting")
|
raise CommandError("No file matching pattern, aborting")
|
||||||
|
|
||||||
if options["replace"]:
|
if options["replace"]:
|
||||||
filtered = {"initial": matching, "skipped": [], "new": matching}
|
filtered = {"initial": matching, "skipped": [], "new": matching}
|
||||||
message = "- {} files to be replaced"
|
message = " - {} files to be replaced"
|
||||||
import_paths = matching
|
import_paths = matching
|
||||||
else:
|
else:
|
||||||
filtered = self.filter_matching(matching, library)
|
filtered = self.filter_matching(matching, library)
|
||||||
message = "- {} files already found in database"
|
message = " - {} files already found in database"
|
||||||
import_paths = filtered["new"]
|
import_paths = filtered["new"]
|
||||||
|
|
||||||
self.stdout.write("Import summary:")
|
self.stdout.write(" Import summary:")
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
"- {} files found matching this pattern: {}".format(
|
" - {} files found matching this pattern: {}".format(
|
||||||
len(matching), options["path"]
|
len(matching), options["path"]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.stdout.write(message.format(len(filtered["skipped"])))
|
self.stdout.write(message.format(len(filtered["skipped"])))
|
||||||
|
|
||||||
self.stdout.write("- {} new files".format(len(filtered["new"])))
|
self.stdout.write(" - {} new files".format(len(filtered["new"])))
|
||||||
|
|
||||||
self.stdout.write(
|
if batch == 1:
|
||||||
"Selected options: {}".format(
|
self.stdout.write(
|
||||||
", ".join(["in place" if options["in_place"] else "copy music files"])
|
" Selected options: {}".format(
|
||||||
|
", ".join(
|
||||||
|
["in place" if options["in_place"] else "copy music files"]
|
||||||
|
)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
if len(filtered["new"]) == 0:
|
if len(filtered["new"]) == 0:
|
||||||
self.stdout.write("Nothing new to import, exiting")
|
self.stdout.write(" Nothing new to import, exiting")
|
||||||
return
|
return
|
||||||
|
|
||||||
if options["interactive"]:
|
if options["interactive"] and not self.is_confirmed:
|
||||||
message = (
|
message = (
|
||||||
"Are you sure you want to do this?\n\n"
|
"Are you sure you want to do this?\n\n"
|
||||||
"Type 'yes' to continue, or 'no' to cancel: "
|
"Type 'yes' to continue, or 'no' to cancel: "
|
||||||
)
|
)
|
||||||
if input("".join(message)) != "yes":
|
if input("".join(message)) != "yes":
|
||||||
raise CommandError("Import cancelled.")
|
raise CommandError("Import cancelled.")
|
||||||
reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())
|
self.is_confirmed = True
|
||||||
|
|
||||||
import_url = "{}://{}/content/libraries/{}/upload?{}"
|
|
||||||
import_url = import_url.format(
|
|
||||||
settings.FUNKWHALE_PROTOCOL,
|
|
||||||
settings.FUNKWHALE_HOSTNAME,
|
|
||||||
str(library.uuid),
|
|
||||||
urllib.parse.urlencode([("import", reference)]),
|
|
||||||
)
|
|
||||||
self.stdout.write(
|
|
||||||
"For details, please refer to import reference '{}' or URL {}".format(
|
|
||||||
reference, import_url
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
errors = self.do_import(
|
errors = self.do_import(
|
||||||
import_paths, library=library, reference=reference, options=options
|
import_paths,
|
||||||
)
|
library=library,
|
||||||
message = "Successfully imported {} tracks"
|
reference=reference,
|
||||||
if options["async_"]:
|
batch=batch,
|
||||||
message = "Successfully launched import for {} tracks"
|
options=options,
|
||||||
|
|
||||||
self.stdout.write(message.format(len(import_paths)))
|
|
||||||
if len(errors) > 0:
|
|
||||||
self.stderr.write("{} tracks could not be imported:".format(len(errors)))
|
|
||||||
|
|
||||||
for path, error in errors:
|
|
||||||
self.stderr.write("- {}: {}".format(path, error))
|
|
||||||
|
|
||||||
self.stdout.write(
|
|
||||||
"For details, please refer to import reference '{}' or URL {}".format(
|
|
||||||
reference, import_url
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
return errors
|
||||||
|
|
||||||
def filter_matching(self, matching, library):
|
def filter_matching(self, matching, library):
|
||||||
sources = ["file://{}".format(p) for p in matching]
|
sources = ["file://{}".format(p) for p in matching]
|
||||||
|
@ -266,17 +348,20 @@ class Command(BaseCommand):
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def do_import(self, paths, library, reference, options):
|
def do_import(self, paths, library, reference, batch, options):
|
||||||
message = "{i}/{total} Importing {path}..."
|
message = "[batch {batch}] {i}/{total} Importing {path}..."
|
||||||
if options["async_"]:
|
if options["async_"]:
|
||||||
message = "{i}/{total} Launching import for {path}..."
|
message = "[batch {batch}] {i}/{total} Launching import for {path}..."
|
||||||
|
|
||||||
# we create an upload binded to the library
|
# we create an upload binded to the library
|
||||||
async_ = options["async_"]
|
async_ = options["async_"]
|
||||||
errors = []
|
errors = []
|
||||||
for i, path in list(enumerate(paths)):
|
for i, path in list(enumerate(paths)):
|
||||||
|
if options["verbosity"] > 1:
|
||||||
|
self.stdout.write(
|
||||||
|
message.format(batch=batch, path=path, i=i + 1, total=len(paths))
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
self.stdout.write(message.format(path=path, i=i + 1, total=len(paths)))
|
|
||||||
self.create_upload(
|
self.create_upload(
|
||||||
path,
|
path,
|
||||||
reference,
|
reference,
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
CLI Importer is now more reliable and less resource-hungry on large libraries
|
|
@ -5,3 +5,20 @@ Next release notes
|
||||||
|
|
||||||
Those release notes refer to the current development branch and are reset
|
Those release notes refer to the current development branch and are reset
|
||||||
after each release.
|
after each release.
|
||||||
|
|
||||||
|
More reliable CLI importer
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Our CLI importer is now more reliable and less prone to Out-of-Memory issues, especially when scanning large libraries. (hundreds of GB or bigger)
|
||||||
|
|
||||||
|
We've also improved the directory crawling logic, so that you don't have to use glob patterns or specify extensions when importing.
|
||||||
|
|
||||||
|
This means you can replace scripts that look like this::
|
||||||
|
|
||||||
|
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/**/*.ogg" "/srv/funkwhale/data/music/**/*.mp3" --recursive --noinput
|
||||||
|
|
||||||
|
By this:
|
||||||
|
|
||||||
|
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
|
||||||
|
|
||||||
|
And Funkwhale will happily import any supported audio file from the specified directory.
|
||||||
|
|
|
@ -15,7 +15,7 @@ You can import those tracks as follows, assuming they are located in
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
export LIBRARY_ID="<your_libary_id>"
|
export LIBRARY_ID="<your_libary_id>"
|
||||||
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/**/*.ogg" --recursive --noinput
|
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
|
||||||
|
|
||||||
When you use docker, the ``/srv/funkwhale/data/music`` is mounted from the host
|
When you use docker, the ``/srv/funkwhale/data/music`` is mounted from the host
|
||||||
to the ``/music`` directory on the container:
|
to the ``/music`` directory on the container:
|
||||||
|
@ -23,14 +23,14 @@ to the ``/music`` directory on the container:
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
export LIBRARY_ID="<your_libary_id>"
|
export LIBRARY_ID="<your_libary_id>"
|
||||||
docker-compose run --rm api python manage.py import_files $LIBRARY_ID "/music/**/*.ogg" --recursive --noinput
|
docker-compose run --rm api python manage.py import_files $LIBRARY_ID "/music/" --recursive --noinput
|
||||||
|
|
||||||
When you installed Funkwhale via ansible, you need to call a script instead of Python, and the folder path must be adapted accordingly:
|
When you installed Funkwhale via ansible, you need to call a script instead of Python, and the folder path must be adapted accordingly:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
export LIBRARY_ID="<your_libary_id>"
|
export LIBRARY_ID="<your_libary_id>"
|
||||||
/srv/funkwhale/manage import_files $LIBRARY_ID "/srv/funkwhale/data/music/**/**/*.ogg" --recursive --noinput
|
/srv/funkwhale/manage import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
You'll have to create a library in the Web UI before to get your library ID. Simply visit
|
You'll have to create a library in the Web UI before to get your library ID. Simply visit
|
||||||
|
@ -107,9 +107,9 @@ you can create a symlink like this::
|
||||||
ln -s /media/mynfsshare /srv/funkwhale/data/music/nfsshare
|
ln -s /media/mynfsshare /srv/funkwhale/data/music/nfsshare
|
||||||
|
|
||||||
And import music from this share with this command::
|
And import music from this share with this command::
|
||||||
|
|
||||||
export LIBRARY_ID="<your_libary_id>"
|
export LIBRARY_ID="<your_libary_id>"
|
||||||
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/nfsshare/**/*.ogg" --recursive --noinput --in-place
|
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/nfsshare/" --recursive --noinput --in-place
|
||||||
|
|
||||||
On docker setups, it will require a bit more work, because while the ``/srv/funkwhale/data/music`` is mounted
|
On docker setups, it will require a bit more work, because while the ``/srv/funkwhale/data/music`` is mounted
|
||||||
in containers, symlinked directories are not.
|
in containers, symlinked directories are not.
|
||||||
|
|
Loading…
Reference in New Issue