Merge branch 'watch-imported-files' into 'develop'
Importer updates: watch directories, handle metadata updates Closes #721 and #741 See merge request funkwhale/funkwhale!1111
This commit is contained in:
commit
d32a9ccbfe
|
@ -1309,3 +1309,6 @@ IGNORE_FORWARDED_HOST_AND_PROTO = env.bool(
|
|||
"""
|
||||
Use :attr:`FUNKWHALE_HOSTNAME` and :attr:`FUNKWHALE_PROTOCOL ` instead of request header.
|
||||
"""
|
||||
|
||||
HASHING_ALGORITHM = "sha256"
|
||||
HASHING_CHUNK_SIZE = 1024 * 100
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import datetime
|
||||
import hashlib
|
||||
|
||||
from django.core.files.base import ContentFile
|
||||
from django.http import request
|
||||
|
@ -458,3 +459,19 @@ def monkey_patch_request_build_absolute_uri():
|
|||
|
||||
request.HttpRequest.scheme = property(scheme)
|
||||
request.HttpRequest.get_host = get_host
|
||||
|
||||
|
||||
def get_file_hash(file, algo=None, chunk_size=None, full_read=False):
|
||||
algo = algo or settings.HASHING_ALGORITHM
|
||||
chunk_size = chunk_size or settings.HASHING_CHUNK_SIZE
|
||||
handler = getattr(hashlib, algo)
|
||||
hash = handler()
|
||||
file.seek(0)
|
||||
if full_read:
|
||||
for byte_block in iter(lambda: file.read(chunk_size), b""):
|
||||
hash.update(byte_block)
|
||||
else:
|
||||
# sometimes, it's useful to only hash the beginning of the file, e.g
|
||||
# to avoid a lot of I/O when crawling large libraries
|
||||
hash.update(file.read(chunk_size))
|
||||
return "{}:{}".format(algo, hash.hexdigest())
|
||||
|
|
|
@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand
|
|||
from django.db import transaction
|
||||
from django.db.models import Q
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music import models, utils
|
||||
|
||||
|
||||
|
@ -17,9 +18,9 @@ class Command(BaseCommand):
|
|||
help="Do not execute anything",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mimetypes",
|
||||
"--mimetype",
|
||||
action="store_true",
|
||||
dest="mimetypes",
|
||||
dest="mimetype",
|
||||
default=True,
|
||||
help="Check and fix mimetypes",
|
||||
)
|
||||
|
@ -37,16 +38,33 @@ class Command(BaseCommand):
|
|||
default=False,
|
||||
help="Check and fix file size, can be really slow because it needs to access files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--checksum",
|
||||
action="store_true",
|
||||
dest="checksum",
|
||||
default=False,
|
||||
help="Check and fix file size, can be really slow because it needs to access files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
"-s",
|
||||
dest="batch_size",
|
||||
default=1000,
|
||||
type=int,
|
||||
help="Size of each updated batch",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
if options["dry_run"]:
|
||||
self.stdout.write("Dry-run on, will not commit anything")
|
||||
if options["mimetypes"]:
|
||||
if options["mimetype"]:
|
||||
self.fix_mimetypes(**options)
|
||||
if options["data"]:
|
||||
self.fix_file_data(**options)
|
||||
if options["size"]:
|
||||
self.fix_file_size(**options)
|
||||
if options["checksum"]:
|
||||
self.fix_file_checksum(**options)
|
||||
|
||||
@transaction.atomic
|
||||
def fix_mimetypes(self, dry_run, **kwargs):
|
||||
|
@ -54,11 +72,12 @@ class Command(BaseCommand):
|
|||
matching = models.Upload.objects.filter(
|
||||
Q(source__startswith="file://") | Q(source__startswith="upload://")
|
||||
).exclude(mimetype__startswith="audio/")
|
||||
total = matching.count()
|
||||
self.stdout.write(
|
||||
"[mimetypes] {} entries found with bad or no mimetype".format(
|
||||
matching.count()
|
||||
)
|
||||
"[mimetypes] {} entries found with bad or no mimetype".format(total)
|
||||
)
|
||||
if not total:
|
||||
return
|
||||
for extension, mimetype in utils.EXTENSION_TO_MIMETYPE.items():
|
||||
qs = matching.filter(source__endswith=".{}".format(extension))
|
||||
self.stdout.write(
|
||||
|
@ -81,24 +100,36 @@ class Command(BaseCommand):
|
|||
)
|
||||
if dry_run:
|
||||
return
|
||||
for i, upload in enumerate(matching.only("audio_file")):
|
||||
self.stdout.write(
|
||||
"[bitrate/length] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
audio_file = upload.get_audio_file()
|
||||
if audio_file:
|
||||
chunks = common_utils.chunk_queryset(
|
||||
matching.only("id", "audio_file", "source"), kwargs["batch_size"]
|
||||
)
|
||||
handled = 0
|
||||
for chunk in chunks:
|
||||
updated = []
|
||||
for upload in chunk:
|
||||
handled += 1
|
||||
self.stdout.write(
|
||||
"[bitrate/length] {}/{} fixing file #{}".format(
|
||||
handled, total, upload.pk
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
audio_file = upload.get_audio_file()
|
||||
data = utils.get_audio_file_data(audio_file)
|
||||
upload.bitrate = data["bitrate"]
|
||||
upload.duration = data["length"]
|
||||
upload.save(update_fields=["duration", "bitrate"])
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[bitrate/length] error with file #{}: {}".format(
|
||||
upload.pk, str(e)
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.stderr.write("[bitrate/length] no file found")
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[bitrate/length] error with file #{}: {}".format(upload.pk, str(e))
|
||||
)
|
||||
updated.append(upload)
|
||||
|
||||
models.Upload.objects.bulk_update(updated, ["bitrate", "duration"])
|
||||
|
||||
def fix_file_size(self, dry_run, **kwargs):
|
||||
self.stdout.write("Fixing missing size...")
|
||||
|
@ -107,15 +138,64 @@ class Command(BaseCommand):
|
|||
self.stdout.write("[size] {} entries found with missing values".format(total))
|
||||
if dry_run:
|
||||
return
|
||||
for i, upload in enumerate(matching.only("size")):
|
||||
self.stdout.write(
|
||||
"[size] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
upload.size = upload.get_file_size()
|
||||
upload.save(update_fields=["size"])
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[size] error with file #{}: {}".format(upload.pk, str(e))
|
||||
chunks = common_utils.chunk_queryset(
|
||||
matching.only("id", "audio_file", "source"), kwargs["batch_size"]
|
||||
)
|
||||
handled = 0
|
||||
for chunk in chunks:
|
||||
updated = []
|
||||
for upload in chunk:
|
||||
handled += 1
|
||||
|
||||
self.stdout.write(
|
||||
"[size] {}/{} fixing file #{}".format(handled, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
upload.size = upload.get_file_size()
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[size] error with file #{}: {}".format(upload.pk, str(e))
|
||||
)
|
||||
else:
|
||||
updated.append(upload)
|
||||
|
||||
models.Upload.objects.bulk_update(updated, ["size"])
|
||||
|
||||
def fix_file_checksum(self, dry_run, **kwargs):
|
||||
self.stdout.write("Fixing missing checksums...")
|
||||
matching = models.Upload.objects.filter(
|
||||
Q(checksum=None)
|
||||
& (Q(audio_file__isnull=False) | Q(source__startswith="file://"))
|
||||
)
|
||||
total = matching.count()
|
||||
self.stdout.write(
|
||||
"[checksum] {} entries found with missing values".format(total)
|
||||
)
|
||||
if dry_run:
|
||||
return
|
||||
chunks = common_utils.chunk_queryset(
|
||||
matching.only("id", "audio_file", "source"), kwargs["batch_size"]
|
||||
)
|
||||
handled = 0
|
||||
for chunk in chunks:
|
||||
updated = []
|
||||
for upload in chunk:
|
||||
handled += 1
|
||||
self.stdout.write(
|
||||
"[checksum] {}/{} fixing file #{}".format(handled, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
upload.checksum = common_utils.get_file_hash(
|
||||
upload.get_audio_file()
|
||||
)
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[checksum] error with file #{}: {}".format(upload.pk, str(e))
|
||||
)
|
||||
else:
|
||||
updated.append(upload)
|
||||
|
||||
models.Upload.objects.bulk_update(updated, ["checksum"])
|
||||
|
|
|
@ -1,17 +1,29 @@
|
|||
import collections
|
||||
import datetime
|
||||
import itertools
|
||||
import os
|
||||
import urllib.parse
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
import watchdog.events
|
||||
import watchdog.observers
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.files import File
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music import models, tasks, utils
|
||||
|
||||
|
||||
def crawl_dir(dir, extensions, recursive=True):
|
||||
def crawl_dir(dir, extensions, recursive=True, ignored=[]):
|
||||
if os.path.isfile(dir):
|
||||
yield dir
|
||||
return
|
||||
|
@ -20,9 +32,12 @@ def crawl_dir(dir, extensions, recursive=True):
|
|||
if entry.is_file():
|
||||
for e in extensions:
|
||||
if entry.name.lower().endswith(".{}".format(e.lower())):
|
||||
yield entry.path
|
||||
if entry.path not in ignored:
|
||||
yield entry.path
|
||||
elif recursive and entry.is_dir():
|
||||
yield from crawl_dir(entry, extensions, recursive=recursive)
|
||||
yield from crawl_dir(
|
||||
entry, extensions, recursive=recursive, ignored=ignored
|
||||
)
|
||||
|
||||
|
||||
def batch(iterable, n=1):
|
||||
|
@ -116,6 +131,17 @@ class Command(BaseCommand):
|
|||
"of overhead on your server and on servers you are federating with."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--watch",
|
||||
action="store_true",
|
||||
dest="watch",
|
||||
default=False,
|
||||
help=(
|
||||
"Start the command in watch mode. Instead of running a full import, "
|
||||
"and exit, watch the given path and import new files, remove deleted "
|
||||
"files, and update metadata corresponding to updated files."
|
||||
),
|
||||
)
|
||||
parser.add_argument("-e", "--extension", nargs="+")
|
||||
|
||||
parser.add_argument(
|
||||
|
@ -128,6 +154,15 @@ class Command(BaseCommand):
|
|||
"This causes some overhead, so it's disabled by default."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prune",
|
||||
action="store_true",
|
||||
dest="prune",
|
||||
default=False,
|
||||
help=(
|
||||
"Once the import is completed, prune tracks, ablums and artists that aren't linked to any upload."
|
||||
),
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--reference",
|
||||
|
@ -157,6 +192,8 @@ class Command(BaseCommand):
|
|||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# handle relative directories
|
||||
options["path"] = [os.path.abspath(path) for path in options["path"]]
|
||||
self.is_confirmed = False
|
||||
try:
|
||||
library = models.Library.objects.select_related("actor__user").get(
|
||||
|
@ -182,22 +219,12 @@ class Command(BaseCommand):
|
|||
)
|
||||
if p and not import_path.startswith(p):
|
||||
raise CommandError(
|
||||
"Importing in-place only works if importing"
|
||||
"Importing in-place only works if importing "
|
||||
"from {} (MUSIC_DIRECTORY_PATH), as this directory"
|
||||
"needs to be accessible by the webserver."
|
||||
"Culprit: {}".format(p, import_path)
|
||||
)
|
||||
|
||||
extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
|
||||
crawler = itertools.chain(
|
||||
*[
|
||||
crawl_dir(p, extensions=extensions, recursive=options["recursive"])
|
||||
for p in options["path"]
|
||||
]
|
||||
)
|
||||
errors = []
|
||||
total = 0
|
||||
start_time = time.time()
|
||||
reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())
|
||||
|
||||
import_url = "{}://{}/library/{}/upload?{}"
|
||||
|
@ -212,8 +239,62 @@ class Command(BaseCommand):
|
|||
reference, import_url
|
||||
)
|
||||
)
|
||||
extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
|
||||
if options["watch"]:
|
||||
if len(options["path"]) > 1:
|
||||
raise CommandError("Watch only work with a single directory")
|
||||
|
||||
return self.setup_watcher(
|
||||
extensions=extensions,
|
||||
path=options["path"][0],
|
||||
reference=reference,
|
||||
library=library,
|
||||
in_place=options["in_place"],
|
||||
prune=options["prune"],
|
||||
recursive=options["recursive"],
|
||||
replace=options["replace"],
|
||||
dispatch_outbox=options["outbox"],
|
||||
broadcast=options["broadcast"],
|
||||
)
|
||||
|
||||
update = True
|
||||
checked_paths = set()
|
||||
if options["in_place"] and update:
|
||||
self.stdout.write("Checking existing files for updates…")
|
||||
message = (
|
||||
"Are you sure you want to do this?\n\n"
|
||||
"Type 'yes' to continue, or 'no' to skip checking for updates in "
|
||||
"already imported files: "
|
||||
)
|
||||
if options["interactive"] and input("".join(message)) != "yes":
|
||||
pass
|
||||
else:
|
||||
checked_paths = check_updates(
|
||||
stdout=self.stdout,
|
||||
paths=options["path"],
|
||||
extensions=extensions,
|
||||
library=library,
|
||||
batch_size=options["batch_size"],
|
||||
)
|
||||
self.stdout.write("Existing files checked, moving on to next step!")
|
||||
|
||||
crawler = itertools.chain(
|
||||
*[
|
||||
crawl_dir(
|
||||
p,
|
||||
extensions=extensions,
|
||||
recursive=options["recursive"],
|
||||
ignored=checked_paths,
|
||||
)
|
||||
for p in options["path"]
|
||||
]
|
||||
)
|
||||
errors = []
|
||||
total = 0
|
||||
start_time = time.time()
|
||||
batch_start = None
|
||||
batch_duration = None
|
||||
self.stdout.write("Starting import of new files…")
|
||||
for i, entries in enumerate(batch(crawler, options["batch_size"])):
|
||||
total += len(entries)
|
||||
batch_start = time.time()
|
||||
|
@ -225,7 +306,7 @@ class Command(BaseCommand):
|
|||
if entries:
|
||||
self.stdout.write(
|
||||
"Handling batch {} ({} items){}".format(
|
||||
i + 1, options["batch_size"], time_stats,
|
||||
i + 1, len(entries), time_stats,
|
||||
)
|
||||
)
|
||||
batch_errors = self.handle_batch(
|
||||
|
@ -240,9 +321,9 @@ class Command(BaseCommand):
|
|||
|
||||
batch_duration = time.time() - batch_start
|
||||
|
||||
message = "Successfully imported {} tracks in {}s"
|
||||
message = "Successfully imported {} new tracks in {}s"
|
||||
if options["async_"]:
|
||||
message = "Successfully launched import for {} tracks in {}s"
|
||||
message = "Successfully launched import for {} new tracks in {}s"
|
||||
|
||||
self.stdout.write(
|
||||
message.format(total - len(errors), int(time.time() - start_time))
|
||||
|
@ -259,6 +340,12 @@ class Command(BaseCommand):
|
|||
)
|
||||
)
|
||||
|
||||
if options["prune"]:
|
||||
self.stdout.write(
|
||||
"Pruning dangling tracks, albums and artists from library…"
|
||||
)
|
||||
prune()
|
||||
|
||||
def handle_batch(self, library, paths, batch, reference, options):
|
||||
matching = []
|
||||
for m in paths:
|
||||
|
@ -362,15 +449,15 @@ class Command(BaseCommand):
|
|||
message.format(batch=batch, path=path, i=i + 1, total=len(paths))
|
||||
)
|
||||
try:
|
||||
self.create_upload(
|
||||
path,
|
||||
reference,
|
||||
library,
|
||||
async_,
|
||||
options["replace"],
|
||||
options["in_place"],
|
||||
options["outbox"],
|
||||
options["broadcast"],
|
||||
create_upload(
|
||||
path=path,
|
||||
reference=reference,
|
||||
library=library,
|
||||
async_=async_,
|
||||
replace=options["replace"],
|
||||
in_place=options["in_place"],
|
||||
dispatch_outbox=options["outbox"],
|
||||
broadcast=options["broadcast"],
|
||||
)
|
||||
except Exception as e:
|
||||
if options["exit_on_failure"]:
|
||||
|
@ -382,34 +469,311 @@ class Command(BaseCommand):
|
|||
errors.append((path, "{} {}".format(e.__class__.__name__, e)))
|
||||
return errors
|
||||
|
||||
def create_upload(
|
||||
self,
|
||||
path,
|
||||
reference,
|
||||
library,
|
||||
async_,
|
||||
replace,
|
||||
in_place,
|
||||
dispatch_outbox,
|
||||
broadcast,
|
||||
):
|
||||
import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
|
||||
upload = models.Upload(library=library, import_reference=reference)
|
||||
upload.source = "file://" + path
|
||||
upload.import_metadata = {
|
||||
"funkwhale": {
|
||||
"config": {
|
||||
"replace": replace,
|
||||
"dispatch_outbox": dispatch_outbox,
|
||||
"broadcast": broadcast,
|
||||
}
|
||||
def setup_watcher(self, path, extensions, recursive, **kwargs):
|
||||
watchdog_queue = queue.Queue()
|
||||
# Set up a worker thread to process database load
|
||||
worker = threading.Thread(
|
||||
target=process_load_queue(self.stdout, **kwargs), args=(watchdog_queue,),
|
||||
)
|
||||
worker.setDaemon(True)
|
||||
worker.start()
|
||||
|
||||
# setup watchdog to monitor directory for trigger files
|
||||
patterns = ["*.{}".format(e) for e in extensions]
|
||||
event_handler = Watcher(
|
||||
stdout=self.stdout, queue=watchdog_queue, patterns=patterns,
|
||||
)
|
||||
observer = watchdog.observers.Observer()
|
||||
observer.schedule(event_handler, path, recursive=recursive)
|
||||
observer.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
self.stdout.write(
|
||||
"Watching for changes at {}…".format(path), ending="\r"
|
||||
)
|
||||
time.sleep(10)
|
||||
if kwargs["prune"] and GLOBAL["need_pruning"]:
|
||||
self.stdout.write("Some files were deleted, pruning library…")
|
||||
prune()
|
||||
GLOBAL["need_pruning"] = False
|
||||
except KeyboardInterrupt:
|
||||
self.stdout.write("Exiting…")
|
||||
observer.stop()
|
||||
|
||||
observer.join()
|
||||
|
||||
|
||||
GLOBAL = {"need_pruning": False}
|
||||
|
||||
|
||||
def prune():
|
||||
call_command(
|
||||
"prune_library",
|
||||
dry_run=False,
|
||||
prune_artists=True,
|
||||
prune_albums=True,
|
||||
prune_tracks=True,
|
||||
)
|
||||
|
||||
|
||||
def create_upload(
|
||||
path, reference, library, async_, replace, in_place, dispatch_outbox, broadcast,
|
||||
):
|
||||
import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
|
||||
upload = models.Upload(library=library, import_reference=reference)
|
||||
upload.source = "file://" + path
|
||||
upload.import_metadata = {
|
||||
"funkwhale": {
|
||||
"config": {
|
||||
"replace": replace,
|
||||
"dispatch_outbox": dispatch_outbox,
|
||||
"broadcast": broadcast,
|
||||
}
|
||||
}
|
||||
if not in_place:
|
||||
name = os.path.basename(path)
|
||||
with open(path, "rb") as f:
|
||||
upload.audio_file.save(name, File(f), save=False)
|
||||
}
|
||||
if not in_place:
|
||||
name = os.path.basename(path)
|
||||
with open(path, "rb") as f:
|
||||
upload.audio_file.save(name, File(f), save=False)
|
||||
|
||||
upload.save()
|
||||
upload.save()
|
||||
|
||||
import_handler(upload_id=upload.pk)
|
||||
import_handler(upload_id=upload.pk)
|
||||
|
||||
|
||||
def process_load_queue(stdout, **kwargs):
|
||||
def inner(q):
|
||||
# we batch events, to avoid calling same methods multiple times if a file is modified
|
||||
# a lot in a really short time
|
||||
flush_delay = 2
|
||||
batched_events = collections.OrderedDict()
|
||||
while True:
|
||||
while True:
|
||||
if not q.empty():
|
||||
event = q.get()
|
||||
batched_events[event["path"]] = event
|
||||
else:
|
||||
break
|
||||
for path, event in batched_events.copy().items():
|
||||
if time.time() - event["time"] <= flush_delay:
|
||||
continue
|
||||
now = datetime.datetime.utcnow()
|
||||
stdout.write(
|
||||
"{} -- Processing {}:{}...\n".format(
|
||||
now.strftime("%Y/%m/%d %H:%M:%S"), event["type"], event["path"]
|
||||
)
|
||||
)
|
||||
del batched_events[path]
|
||||
handle_event(event, stdout=stdout, **kwargs)
|
||||
time.sleep(1)
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
class Watcher(watchdog.events.PatternMatchingEventHandler):
|
||||
def __init__(self, stdout, queue, patterns):
|
||||
self.stdout = stdout
|
||||
self.queue = queue
|
||||
super().__init__(patterns=patterns)
|
||||
|
||||
def enqueue(self, event):
|
||||
e = {
|
||||
"is_directory": event.is_directory,
|
||||
"type": event.event_type,
|
||||
"path": event.src_path,
|
||||
"src_path": event.src_path,
|
||||
"dest_path": getattr(event, "dest_path", None),
|
||||
"time": time.time(),
|
||||
}
|
||||
self.queue.put(e)
|
||||
|
||||
def on_moved(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
def on_created(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
def on_deleted(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
def on_modified(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
|
||||
def handle_event(event, stdout, **kwargs):
|
||||
handlers = {
|
||||
"modified": handle_modified,
|
||||
"created": handle_created,
|
||||
"moved": handle_moved,
|
||||
"deleted": handle_deleted,
|
||||
}
|
||||
handlers[event["type"]](event=event, stdout=stdout, **kwargs)
|
||||
|
||||
|
||||
def handle_modified(event, stdout, library, in_place, **kwargs):
|
||||
existing_candidates = library.uploads.filter(import_status="finished")
|
||||
with open(event["path"], "rb") as f:
|
||||
checksum = common_utils.get_file_hash(f)
|
||||
|
||||
existing = existing_candidates.filter(checksum=checksum).first()
|
||||
if existing:
|
||||
# found an existing file with same checksum, nothing to do
|
||||
stdout.write(" File already imported and metadata is up-to-date")
|
||||
return
|
||||
|
||||
to_update = None
|
||||
if in_place:
|
||||
source = "file://{}".format(event["path"])
|
||||
to_update = (
|
||||
existing_candidates.in_place()
|
||||
.filter(source=source)
|
||||
.select_related(
|
||||
"track__attributed_to", "track__artist", "track__album__artist",
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if to_update:
|
||||
if (
|
||||
to_update.track.attributed_to
|
||||
and to_update.track.attributed_to != library.actor
|
||||
):
|
||||
stdout.write(
|
||||
" Cannot update track metadata, track belongs to someone else".format(
|
||||
to_update.pk
|
||||
)
|
||||
)
|
||||
return
|
||||
else:
|
||||
stdout.write(
|
||||
" Updating existing file #{} with new metadata…".format(
|
||||
to_update.pk
|
||||
)
|
||||
)
|
||||
audio_metadata = to_update.get_metadata()
|
||||
try:
|
||||
tasks.update_track_metadata(audio_metadata, to_update.track)
|
||||
except serializers.ValidationError as e:
|
||||
stdout.write(" Invalid metadata: {}".format(e))
|
||||
else:
|
||||
to_update.checksum = checksum
|
||||
to_update.save(update_fields=["checksum"])
|
||||
return
|
||||
|
||||
stdout.write(" Launching import for new file")
|
||||
create_upload(
|
||||
path=event["path"],
|
||||
reference=kwargs["reference"],
|
||||
library=library,
|
||||
async_=False,
|
||||
replace=kwargs["replace"],
|
||||
in_place=in_place,
|
||||
dispatch_outbox=kwargs["dispatch_outbox"],
|
||||
broadcast=kwargs["broadcast"],
|
||||
)
|
||||
|
||||
|
||||
def handle_created(event, stdout, **kwargs):
|
||||
"""
|
||||
Created is essentially an alias for modified, because for instance when copying a file in the watched directory,
|
||||
a created event will be fired on the initial touch, then many modified event (as the file is written).
|
||||
"""
|
||||
return handle_modified(event, stdout, **kwargs)
|
||||
|
||||
|
||||
def handle_moved(event, stdout, library, in_place, **kwargs):
|
||||
if not in_place:
|
||||
return
|
||||
|
||||
old_source = "file://{}".format(event["src_path"])
|
||||
new_source = "file://{}".format(event["dest_path"])
|
||||
existing_candidates = library.uploads.filter(import_status="finished")
|
||||
existing_candidates = existing_candidates.in_place().filter(source=old_source)
|
||||
existing = existing_candidates.first()
|
||||
if existing:
|
||||
stdout.write(" Updating path of existing file #{}".format(existing.pk))
|
||||
existing.source = new_source
|
||||
existing.save(update_fields=["source"])
|
||||
|
||||
|
||||
def handle_deleted(event, stdout, library, in_place, **kwargs):
|
||||
if not in_place:
|
||||
return
|
||||
source = "file://{}".format(event["path"])
|
||||
existing_candidates = library.uploads.filter(import_status="finished")
|
||||
existing_candidates = existing_candidates.in_place().filter(source=source)
|
||||
if existing_candidates.count():
|
||||
stdout.write(" Removing file from DB")
|
||||
existing_candidates.delete()
|
||||
GLOBAL["need_pruning"] = True
|
||||
|
||||
|
||||
def check_updates(stdout, library, extensions, paths, batch_size):
|
||||
existing = (
|
||||
library.uploads.in_place()
|
||||
.filter(import_status="finished")
|
||||
.exclude(checksum=None)
|
||||
.select_related("library", "track")
|
||||
)
|
||||
queries = []
|
||||
checked_paths = set()
|
||||
for path in paths:
|
||||
for ext in extensions:
|
||||
queries.append(
|
||||
Q(source__startswith="file://{}".format(path))
|
||||
& Q(source__endswith=".{}".format(ext))
|
||||
)
|
||||
query, remainder = queries[0], queries[1:]
|
||||
for q in remainder:
|
||||
query = q | query
|
||||
existing = existing.filter(query)
|
||||
total = existing.count()
|
||||
stdout.write("Found {} files to check in database!".format(total))
|
||||
uploads = existing.order_by("source")
|
||||
for i, rows in enumerate(batch(uploads.iterator(), batch_size)):
|
||||
stdout.write("Handling batch {} ({} items)".format(i + 1, len(rows),))
|
||||
|
||||
for upload in rows:
|
||||
|
||||
check_upload(stdout, upload)
|
||||
checked_paths.add(upload.source.replace("file://", "", 1))
|
||||
|
||||
return checked_paths
|
||||
|
||||
|
||||
def check_upload(stdout, upload):
|
||||
try:
|
||||
audio_file = upload.get_audio_file()
|
||||
except FileNotFoundError:
|
||||
stdout.write(
|
||||
" Removing file #{} missing from disk at {}".format(
|
||||
upload.pk, upload.source
|
||||
)
|
||||
)
|
||||
return upload.delete()
|
||||
|
||||
checksum = common_utils.get_file_hash(audio_file)
|
||||
if upload.checksum != checksum:
|
||||
stdout.write(
|
||||
" File #{} at {} was modified, updating metadata…".format(
|
||||
upload.pk, upload.source
|
||||
)
|
||||
)
|
||||
if upload.library.actor_id != upload.track.attributed_to_id:
|
||||
stdout.write(
|
||||
" Cannot update track metadata, track belongs to someone else".format(
|
||||
upload.pk
|
||||
)
|
||||
)
|
||||
else:
|
||||
track = models.Track.objects.select_related("artist", "album__artist").get(
|
||||
pk=upload.track_id
|
||||
)
|
||||
try:
|
||||
tasks.update_track_metadata(upload.get_metadata(), track)
|
||||
except serializers.ValidationError as e:
|
||||
stdout.write(" Invalid metadata: {}".format(e))
|
||||
return
|
||||
else:
|
||||
upload.checksum = checksum
|
||||
upload.save(update_fields=["checksum"])
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
# Generated by Django 3.0.4 on 2020-05-05 08:10
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('music', '0051_auto_20200319_1249'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='upload',
|
||||
name='checksum',
|
||||
field=models.CharField(blank=True, db_index=True, max_length=100, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='uploadversion',
|
||||
name='mimetype',
|
||||
field=models.CharField(choices=[('audio/mp3', 'mp3'), ('audio/mpeg3', 'mp3'), ('audio/x-mp3', 'mp3'), ('audio/mpeg', 'mp3'), ('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
|
||||
),
|
||||
]
|
|
@ -655,6 +655,14 @@ class Track(APIModelMixin):
|
|||
|
||||
|
||||
class UploadQuerySet(common_models.NullsLastQuerySet):
|
||||
def in_place(self, include=True):
|
||||
query = models.Q(source__startswith="file://") & (
|
||||
models.Q(audio_file="") | models.Q(audio_file=None)
|
||||
)
|
||||
if not include:
|
||||
query = ~query
|
||||
return self.filter(query)
|
||||
|
||||
def playable_by(self, actor, include=True):
|
||||
libraries = Library.objects.viewable_by(actor)
|
||||
|
||||
|
@ -754,6 +762,9 @@ class Upload(models.Model):
|
|||
)
|
||||
downloads_count = models.PositiveIntegerField(default=0)
|
||||
|
||||
# stores checksums such as `sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
|
||||
checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True)
|
||||
|
||||
objects = UploadQuerySet.as_manager()
|
||||
|
||||
@property
|
||||
|
@ -833,7 +844,7 @@ class Upload(models.Model):
|
|||
def get_audio_file(self):
|
||||
if self.audio_file:
|
||||
return self.audio_file.open()
|
||||
if self.source.startswith("file://"):
|
||||
if self.source and self.source.startswith("file://"):
|
||||
return open(self.source.replace("file://", "", 1), "rb")
|
||||
|
||||
def get_audio_data(self):
|
||||
|
@ -866,6 +877,15 @@ class Upload(models.Model):
|
|||
self.mimetype = mimetypes.guess_type(self.source)[0]
|
||||
if not self.size and self.audio_file:
|
||||
self.size = self.audio_file.size
|
||||
if not self.checksum:
|
||||
try:
|
||||
audio_file = self.get_audio_file()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
if audio_file:
|
||||
self.checksum = common_utils.get_file_hash(audio_file)
|
||||
|
||||
if not self.pk and not self.fid and self.library.actor.get_user():
|
||||
self.fid = self.get_federation_id()
|
||||
return super().save(**kwargs)
|
||||
|
|
|
@ -851,3 +851,71 @@ def update_library_entity(obj, data):
|
|||
obj.save(update_fields=list(data.keys()))
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
UPDATE_CONFIG = {
|
||||
"track": {
|
||||
"position": {},
|
||||
"title": {},
|
||||
"mbid": {},
|
||||
"disc_number": {},
|
||||
"copyright": {},
|
||||
"license": {
|
||||
"getter": lambda data, field: licenses.match(
|
||||
data.get("license"), data.get("copyright")
|
||||
)
|
||||
},
|
||||
},
|
||||
"album": {"title": {}, "mbid": {}, "release_date": {}},
|
||||
"artist": {"name": {}, "mbid": {}},
|
||||
"album_artist": {"name": {}, "mbid": {}},
|
||||
}
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def update_track_metadata(audio_metadata, track):
|
||||
# XXX: implement this to support updating metadata when an imported files
|
||||
# is updated by an outside tool (e.g beets).
|
||||
serializer = metadata.TrackMetadataSerializer(data=audio_metadata)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
new_data = serializer.validated_data
|
||||
|
||||
to_update = [
|
||||
("track", track, lambda data: data),
|
||||
("album", track.album, lambda data: data["album"]),
|
||||
("artist", track.artist, lambda data: data["artists"][0]),
|
||||
(
|
||||
"album_artist",
|
||||
track.album.artist if track.album else None,
|
||||
lambda data: data["album"]["artists"][0],
|
||||
),
|
||||
]
|
||||
for id, obj, data_getter in to_update:
|
||||
if not obj:
|
||||
continue
|
||||
obj_updated_fields = []
|
||||
try:
|
||||
obj_data = data_getter(new_data)
|
||||
except IndexError:
|
||||
continue
|
||||
for field, config in UPDATE_CONFIG[id].items():
|
||||
getter = config.get(
|
||||
"getter", lambda data, field: data[config.get("field", field)]
|
||||
)
|
||||
try:
|
||||
new_value = getter(obj_data, field)
|
||||
except KeyError:
|
||||
continue
|
||||
old_value = getattr(obj, field)
|
||||
if new_value == old_value:
|
||||
continue
|
||||
obj_updated_fields.append(field)
|
||||
setattr(obj, field, new_value)
|
||||
|
||||
if obj_updated_fields:
|
||||
obj.save(update_fields=obj_updated_fields)
|
||||
|
||||
if track.album and "album" in new_data and new_data["album"].get("cover_data"):
|
||||
common_utils.attach_file(
|
||||
track.album, "attachment_cover", new_data["album"].get("cover_data")
|
||||
)
|
||||
|
|
|
@ -83,3 +83,4 @@ service_identity==18.1.0
|
|||
markdown>=3.2,<4
|
||||
bleach>=3,<4
|
||||
feedparser==6.0.0b3
|
||||
watchdog==0.10.2
|
||||
|
|
|
@ -258,3 +258,12 @@ def test_monkey_patch_request_build_absolute_uri(
|
|||
request = fake_request.get("/", **meta)
|
||||
|
||||
assert request.build_absolute_uri(path) == expected
|
||||
|
||||
|
||||
def test_get_file_hash(tmpfile, settings):
|
||||
settings.HASHING_ALGORITHM = "sha256"
|
||||
content = b"hello"
|
||||
tmpfile.write(content)
|
||||
# echo -n "hello" | sha256sum
|
||||
expected = "sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
|
||||
assert utils.get_file_hash(tmpfile) == expected
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import pytest
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music.management.commands import check_inplace_files
|
||||
from funkwhale_api.music.management.commands import fix_uploads
|
||||
from funkwhale_api.music.management.commands import prune_library
|
||||
|
@ -18,7 +19,7 @@ def test_fix_uploads_bitrate_length(factories, mocker):
|
|||
return_value={"bitrate": 42, "length": 43},
|
||||
)
|
||||
|
||||
c.fix_file_data(dry_run=False)
|
||||
c.fix_file_data(dry_run=False, batch_size=100)
|
||||
|
||||
upload1.refresh_from_db()
|
||||
upload2.refresh_from_db()
|
||||
|
@ -41,7 +42,7 @@ def test_fix_uploads_size(factories, mocker):
|
|||
|
||||
mocker.patch("funkwhale_api.music.models.Upload.get_file_size", return_value=2)
|
||||
|
||||
c.fix_file_size(dry_run=False)
|
||||
c.fix_file_size(dry_run=False, batch_size=100)
|
||||
|
||||
upload1.refresh_from_db()
|
||||
upload2.refresh_from_db()
|
||||
|
@ -69,7 +70,7 @@ def test_fix_uploads_mimetype(factories, mocker):
|
|||
mimetype="audio/something",
|
||||
)
|
||||
c = fix_uploads.Command()
|
||||
c.fix_mimetypes(dry_run=False)
|
||||
c.fix_mimetypes(dry_run=False, batch_size=100)
|
||||
|
||||
upload1.refresh_from_db()
|
||||
upload2.refresh_from_db()
|
||||
|
@ -78,6 +79,25 @@ def test_fix_uploads_mimetype(factories, mocker):
|
|||
assert upload2.mimetype == "audio/something"
|
||||
|
||||
|
||||
def test_fix_uploads_checksum(factories, mocker):
|
||||
upload1 = factories["music.Upload"]()
|
||||
upload2 = factories["music.Upload"]()
|
||||
upload1.__class__.objects.filter(pk=upload1.pk).update(checksum="test")
|
||||
upload2.__class__.objects.filter(pk=upload2.pk).update(checksum=None)
|
||||
c = fix_uploads.Command()
|
||||
|
||||
c.fix_file_checksum(dry_run=False, batch_size=100)
|
||||
|
||||
upload1.refresh_from_db()
|
||||
upload2.refresh_from_db()
|
||||
|
||||
# not updated
|
||||
assert upload1.checksum == "test"
|
||||
|
||||
# updated
|
||||
assert upload2.checksum == common_utils.get_file_hash(upload2.audio_file)
|
||||
|
||||
|
||||
def test_prune_library_dry_run(factories):
|
||||
prunable = factories["music.Track"]()
|
||||
not_prunable = factories["music.Track"]()
|
||||
|
|
|
@ -5,6 +5,7 @@ import pytest
|
|||
from django.utils import timezone
|
||||
from django.urls import reverse
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music import importers, models, tasks
|
||||
from funkwhale_api.federation import utils as federation_utils
|
||||
|
||||
|
@ -164,6 +165,17 @@ def test_audio_track_mime_type(extention, mimetype, factories):
|
|||
assert upload.mimetype == mimetype
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["test.ogg", "test.mp3"])
|
||||
def test_audio_track_checksum(name, factories):
|
||||
|
||||
path = os.path.join(DATA_DIR, name)
|
||||
upload = factories["music.Upload"](audio_file__from_path=path, mimetype=None)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
expected = common_utils.get_file_hash(f)
|
||||
assert upload.checksum == expected
|
||||
|
||||
|
||||
def test_upload_file_name(factories):
|
||||
name = "test.mp3"
|
||||
path = os.path.join(DATA_DIR, name)
|
||||
|
|
|
@ -1329,3 +1329,40 @@ def test_can_import_track_with_same_position_in_same_discs_skipped(factories, mo
|
|||
new_upload.refresh_from_db()
|
||||
|
||||
assert new_upload.import_status == "skipped"
|
||||
|
||||
|
||||
def test_update_track_metadata(factories):
|
||||
track = factories["music.Track"]()
|
||||
data = {
|
||||
"title": "Peer Gynt Suite no. 1, op. 46: I. Morning",
|
||||
"artist": "Edvard Grieg",
|
||||
"album_artist": "Edvard Grieg; Musopen Symphony Orchestra",
|
||||
"album": "Peer Gynt Suite no. 1, op. 46",
|
||||
"date": "2012-08-15",
|
||||
"position": "4",
|
||||
"disc_number": "2",
|
||||
"musicbrainz_albumid": "a766da8b-8336-47aa-a3ee-371cc41ccc75",
|
||||
"mbid": "bd21ac48-46d8-4e78-925f-d9cc2a294656",
|
||||
"musicbrainz_artistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823",
|
||||
"musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f",
|
||||
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
|
||||
"copyright": "Someone",
|
||||
"comment": "hello there",
|
||||
}
|
||||
tasks.update_track_metadata(metadata.FakeMetadata(data), track)
|
||||
|
||||
track.refresh_from_db()
|
||||
|
||||
assert track.title == data["title"]
|
||||
assert track.position == int(data["position"])
|
||||
assert track.disc_number == int(data["disc_number"])
|
||||
assert track.license.code == "cc-by-sa-4.0"
|
||||
assert track.copyright == data["copyright"]
|
||||
assert str(track.mbid) == data["mbid"]
|
||||
assert track.album.title == data["album"]
|
||||
assert track.album.release_date == datetime.date(2012, 8, 15)
|
||||
assert str(track.album.mbid) == data["musicbrainz_albumid"]
|
||||
assert track.artist.name == data["artist"]
|
||||
assert str(track.artist.mbid) == data["musicbrainz_artistid"]
|
||||
assert track.album.artist.name == "Edvard Grieg"
|
||||
assert str(track.album.artist.mbid) == "013c8e5b-d72a-4cd3-8dee-6c64d6125823"
|
||||
|
|
|
@ -4,6 +4,8 @@ import pytest
|
|||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music.management.commands import import_files
|
||||
|
||||
DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "files")
|
||||
|
||||
|
@ -159,3 +161,194 @@ def test_import_files_in_place(factories, mocker, settings):
|
|||
def test_storage_rename_utf_8_files(factories):
|
||||
upload = factories["music.Upload"](audio_file__filename="été.ogg")
|
||||
assert upload.audio_file.name.endswith("ete.ogg")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["modified", "moved", "created", "deleted"])
|
||||
def test_handle_event(name, mocker):
|
||||
handler = mocker.patch.object(import_files, "handle_{}".format(name))
|
||||
|
||||
event = {"type": name}
|
||||
stdout = mocker.Mock()
|
||||
kwargs = {"hello": "world"}
|
||||
import_files.handle_event(event, stdout, **kwargs)
|
||||
|
||||
handler.assert_called_once_with(event=event, stdout=stdout, **kwargs)
|
||||
|
||||
|
||||
def test_handle_created(mocker):
|
||||
handle_modified = mocker.patch.object(import_files, "handle_modified")
|
||||
|
||||
event = mocker.Mock()
|
||||
stdout = mocker.Mock()
|
||||
kwargs = {"hello": "world"}
|
||||
import_files.handle_created(event, stdout, **kwargs)
|
||||
|
||||
handle_modified.assert_called_once_with(event, stdout, **kwargs)
|
||||
|
||||
|
||||
def test_handle_deleted(factories, mocker):
|
||||
stdout = mocker.Mock()
|
||||
event = {
|
||||
"path": "/path.mp3",
|
||||
}
|
||||
library = factories["music.Library"]()
|
||||
deleted = factories["music.Upload"](
|
||||
library=library,
|
||||
source="file://{}".format(event["path"]),
|
||||
import_status="finished",
|
||||
audio_file=None,
|
||||
)
|
||||
kept = [
|
||||
factories["music.Upload"](
|
||||
library=library,
|
||||
source="file://{}".format(event["path"]),
|
||||
import_status="finished",
|
||||
),
|
||||
factories["music.Upload"](
|
||||
source="file://{}".format(event["path"]),
|
||||
import_status="finished",
|
||||
audio_file=None,
|
||||
),
|
||||
]
|
||||
|
||||
import_files.handle_deleted(
|
||||
event=event, stdout=stdout, library=library, in_place=True
|
||||
)
|
||||
|
||||
with pytest.raises(deleted.DoesNotExist):
|
||||
deleted.refresh_from_db()
|
||||
|
||||
for upload in kept:
|
||||
upload.refresh_from_db()
|
||||
|
||||
|
||||
def test_handle_moved(factories, mocker):
|
||||
stdout = mocker.Mock()
|
||||
event = {
|
||||
"src_path": "/path.mp3",
|
||||
"dest_path": "/new_path.mp3",
|
||||
}
|
||||
library = factories["music.Library"]()
|
||||
updated = factories["music.Upload"](
|
||||
library=library,
|
||||
source="file://{}".format(event["src_path"]),
|
||||
import_status="finished",
|
||||
audio_file=None,
|
||||
)
|
||||
untouched = [
|
||||
factories["music.Upload"](
|
||||
library=library,
|
||||
source="file://{}".format(event["src_path"]),
|
||||
import_status="finished",
|
||||
),
|
||||
factories["music.Upload"](
|
||||
source="file://{}".format(event["src_path"]),
|
||||
import_status="finished",
|
||||
audio_file=None,
|
||||
),
|
||||
]
|
||||
|
||||
import_files.handle_moved(
|
||||
event=event, stdout=stdout, library=library, in_place=True
|
||||
)
|
||||
|
||||
updated.refresh_from_db()
|
||||
assert updated.source == "file://{}".format(event["dest_path"])
|
||||
for upload in untouched:
|
||||
source = upload.source
|
||||
upload.refresh_from_db()
|
||||
assert source == upload.source
|
||||
|
||||
|
||||
def test_handle_modified_creates_upload(tmpfile, factories, mocker):
|
||||
stdout = mocker.Mock()
|
||||
event = {
|
||||
"path": tmpfile.name,
|
||||
}
|
||||
process_upload = mocker.patch("funkwhale_api.music.tasks.process_upload")
|
||||
library = factories["music.Library"]()
|
||||
import_files.handle_modified(
|
||||
event=event,
|
||||
stdout=stdout,
|
||||
library=library,
|
||||
in_place=True,
|
||||
reference="hello",
|
||||
replace=False,
|
||||
dispatch_outbox=False,
|
||||
broadcast=False,
|
||||
)
|
||||
upload = library.uploads.latest("id")
|
||||
assert upload.source == "file://{}".format(event["path"])
|
||||
|
||||
process_upload.assert_called_once_with(upload_id=upload.pk)
|
||||
|
||||
|
||||
def test_handle_modified_skips_existing_checksum(tmpfile, factories, mocker):
|
||||
stdout = mocker.Mock()
|
||||
event = {
|
||||
"path": tmpfile.name,
|
||||
}
|
||||
tmpfile.write(b"hello")
|
||||
|
||||
library = factories["music.Library"]()
|
||||
factories["music.Upload"](
|
||||
checksum=common_utils.get_file_hash(tmpfile),
|
||||
library=library,
|
||||
import_status="finished",
|
||||
)
|
||||
import_files.handle_modified(
|
||||
event=event, stdout=stdout, library=library, in_place=True,
|
||||
)
|
||||
assert library.uploads.count() == 1
|
||||
|
||||
|
||||
def test_handle_modified_update_existing_path_if_found(tmpfile, factories, mocker):
|
||||
stdout = mocker.Mock()
|
||||
event = {
|
||||
"path": tmpfile.name,
|
||||
}
|
||||
update_track_metadata = mocker.patch(
|
||||
"funkwhale_api.music.tasks.update_track_metadata"
|
||||
)
|
||||
get_metadata = mocker.patch("funkwhale_api.music.models.Upload.get_metadata")
|
||||
library = factories["music.Library"]()
|
||||
track = factories["music.Track"](attributed_to=library.actor)
|
||||
upload = factories["music.Upload"](
|
||||
source="file://{}".format(event["path"]),
|
||||
track=track,
|
||||
checksum="old",
|
||||
library=library,
|
||||
import_status="finished",
|
||||
audio_file=None,
|
||||
)
|
||||
import_files.handle_modified(
|
||||
event=event, stdout=stdout, library=library, in_place=True,
|
||||
)
|
||||
update_track_metadata.assert_called_once_with(
|
||||
get_metadata.return_value, upload.track,
|
||||
)
|
||||
|
||||
|
||||
def test_handle_modified_update_existing_path_if_found_and_attributed_to(
|
||||
tmpfile, factories, mocker
|
||||
):
|
||||
stdout = mocker.Mock()
|
||||
event = {
|
||||
"path": tmpfile.name,
|
||||
}
|
||||
update_track_metadata = mocker.patch(
|
||||
"funkwhale_api.music.tasks.update_track_metadata"
|
||||
)
|
||||
library = factories["music.Library"]()
|
||||
factories["music.Upload"](
|
||||
source="file://{}".format(event["path"]),
|
||||
checksum="old",
|
||||
library=library,
|
||||
track__attributed_to=factories["federation.Actor"](),
|
||||
import_status="finished",
|
||||
audio_file=None,
|
||||
)
|
||||
import_files.handle_modified(
|
||||
event=event, stdout=stdout, library=library, in_place=True,
|
||||
)
|
||||
update_track_metadata.assert_not_called()
|
||||
|
|
|
@ -1 +1 @@
|
|||
Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetypes`` to set proper mimetypes on existing uploads.
|
||||
Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetype`` to set proper mimetypes on existing uploads.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Support a --watch mode with ``import_files`` to automatically add, update and remove files when filesystem is updated (#721)
|
|
@ -1,15 +1,21 @@
|
|||
Importing music
|
||||
================
|
||||
Importing music from the server
|
||||
===============================
|
||||
|
||||
From music directory on the server
|
||||
----------------------------------
|
||||
|
||||
You can import music files in Funkwhale assuming they are located on the server
|
||||
and readable by the Funkwhale application. Your music files should contain at
|
||||
Funkwhale can import music files that are located on the server assuming
|
||||
they readable by the Funkwhale application. Your music files should contain at
|
||||
least an ``artist``, ``album`` and ``title`` tags, but we recommend you tag
|
||||
it extensively using a proper tool, such as Beets or Musicbrainz Picard.
|
||||
|
||||
You can import those tracks as follows, assuming they are located in
|
||||
Funkwhale supports two different import modes:
|
||||
|
||||
- copy (the default): files are copied into Funkwhale's internal storage. This means importing a 1GB library will result in the same amount of space being used by Funkwhale.
|
||||
- :ref:`in-place <in-place-import>` (when the ``--in-place`` is provided): files are referenced in Funkwhale's DB but not copied or touched in anyway. This is useful if you have a huge library, or one that is updated by an external tool such as Beets..
|
||||
|
||||
.. note::
|
||||
|
||||
In Funkwhale 1.0, **the default behaviour will change to in-place import**
|
||||
|
||||
Regardless of the mode you're choosing, import works as described below, assuming your files are located in
|
||||
``/srv/funkwhale/data/music``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
@ -17,6 +23,17 @@ You can import those tracks as follows, assuming they are located in
|
|||
export LIBRARY_ID="<your_libary_id>"
|
||||
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
|
||||
|
||||
.. note::
|
||||
You'll have to create a library in the Web UI before to get your library ID. Simply visit
|
||||
https://yourdomain/content/libraries/ to create one.
|
||||
|
||||
Library IDs are available in library urls or sharing link. In this example:
|
||||
https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1,
|
||||
the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1
|
||||
|
||||
You can use only the first characters of the ID when calling the command, like that:
|
||||
``export LIBRARY_ID="769a2bc3"``
|
||||
|
||||
When you use docker, the ``/srv/funkwhale/data/music`` is mounted from the host
|
||||
to the ``/music`` directory on the container:
|
||||
|
||||
|
@ -32,16 +49,6 @@ When you installed Funkwhale via ansible, you need to call a script instead of P
|
|||
export LIBRARY_ID="<your_libary_id>"
|
||||
/srv/funkwhale/manage import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
|
||||
|
||||
.. note::
|
||||
You'll have to create a library in the Web UI before to get your library ID. Simply visit
|
||||
https://yourdomain/content/libraries/ to create one.
|
||||
|
||||
Library IDs are available in library urls or sharing link. In this example:
|
||||
https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1,
|
||||
the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1
|
||||
|
||||
You can use only the first characters of the ID when calling the command, like that:
|
||||
``export LIBRARY_ID="769a2bc3"``
|
||||
|
||||
The import command supports several options, and you can check the help to
|
||||
get details::
|
||||
|
@ -63,6 +70,7 @@ get details::
|
|||
At the moment, only Flac, OGG/Vorbis and MP3 files with ID3 tags are supported
|
||||
|
||||
|
||||
|
||||
.. _in-place-import:
|
||||
|
||||
In-place import
|
||||
|
@ -88,14 +96,6 @@ configuration options to ensure the webserver can serve them properly:
|
|||
- :ref:`setting-MUSIC_DIRECTORY_PATH`
|
||||
- :ref:`setting-MUSIC_DIRECTORY_SERVE_PATH`
|
||||
|
||||
.. warning::
|
||||
|
||||
While in-place import is faster and less disk-space-hungry, it's also
|
||||
more fragile: if, for some reason, you move or rename the source files,
|
||||
Funkwhale will not be able to serve those files anymore.
|
||||
|
||||
Thus, be especially careful when you manipulate the source files.
|
||||
|
||||
We recommend you symlink all your music directories into ``/srv/funkwhale/data/music``
|
||||
and run the `import_files` command from that directory. This will make it possible
|
||||
to use multiple music directories, without any additional configuration
|
||||
|
@ -134,6 +134,49 @@ If you want to go with symlinks, ensure each symlinked directory is mounted as a
|
|||
# add your symlinked dirs here
|
||||
- /media/nfsshare:/media/nfsshare:ro
|
||||
|
||||
Metadata updates
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
When doing an import with in ``in-place`` mode, the importer will also check and update existing entries
|
||||
found in the database. For instance, if a file was imported, the ID3 Title tag was updated, and you rerun a scan,
|
||||
Funkwhale will pick up the new title. The following fields can be updated this way:
|
||||
|
||||
- Track mbid
|
||||
- Track title
|
||||
- Track position and disc number
|
||||
- Track license and copyright
|
||||
- Album cover
|
||||
- Album title
|
||||
- Album mbid
|
||||
- Album release date
|
||||
- Artist name
|
||||
- Artist mbid
|
||||
- Album artist name
|
||||
- Album artist mbid
|
||||
|
||||
|
||||
React to filesystem events with ``--watch``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you have a really big library or one that is updated quite often, running the ``import_files`` command by hand
|
||||
may not be practical. To help with this use case, the ``import_files`` command supports a ``--watch`` flag that will observes filesystem events
|
||||
instead of performing a full import.
|
||||
|
||||
File creation, move, update and removal are handled when ``--watch`` is provided:
|
||||
|
||||
- Files created in the watched directory are imported immediatly
|
||||
- If using ``in-place`` mode, files updates trigger a metadata update on the corresponding entries
|
||||
- If using ``in-place`` mode, files that are moved and known by Funkwhale will see their path updated in Funkwhale's DB
|
||||
- If using ``in-place`` mode, files that are removed and known by Funkwhale will be removed from Funkwhale's DB
|
||||
|
||||
Pruning dangling metadata with ``--prune``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Funkwhale is, by design, conservative with music metadata in its database. If you remove a file from Funkwhale's DB,
|
||||
the corresponding artist, album and track object won't be deleted by default.
|
||||
|
||||
If you want to prune dangling metadata from the database once the ``import_files`` command is over, simply add the ``--prune`` flag.
|
||||
This also works in with ``--watch``.
|
||||
|
||||
Album covers
|
||||
^^^^^^^^^^^^
|
||||
|
@ -159,9 +202,3 @@ under creative commons (courtesy of Jamendo):
|
|||
./download-tracks.sh music.txt
|
||||
|
||||
This will download a bunch of zip archives (one per album) under the ``data/music`` directory and unzip their content.
|
||||
|
||||
From other instances
|
||||
--------------------
|
||||
|
||||
Funkwhale also supports importing music from other instances. Please refer
|
||||
to :doc:`../federation/index` for more details.
|
||||
|
|
Loading…
Reference in New Issue