diff --git a/api/funkwhale_api/music/management/commands/check_inplace_files.py b/api/funkwhale_api/music/management/commands/check_inplace_files.py new file mode 100644 index 000000000..f274ee589 --- /dev/null +++ b/api/funkwhale_api/music/management/commands/check_inplace_files.py @@ -0,0 +1,76 @@ +import os +from argparse import RawTextHelpFormatter + +from django.core.management.base import BaseCommand + +from django.db import transaction + +from funkwhale_api.music import models + + +def progress(buffer, count, total, status=""): + bar_len = 60 + filled_len = int(round(bar_len * count / float(total))) + + bar = "=" * filled_len + "-" * (bar_len - filled_len) + + buffer.write("[%s] %s/%s ...%s\r" % (bar, count, total, status)) + buffer.flush() + + +class Command(BaseCommand): + help = """ + Loop through all in-place imported files in the database, and verify + that the corresponding files are present on the filesystem. If some files are not + found and --no-dry-run is specified, the corresponding database objects will be deleted. + """ + + def create_parser(self, *args, **kwargs): + parser = super().create_parser(*args, **kwargs) + parser.formatter_class = RawTextHelpFormatter + return parser + + def add_arguments(self, parser): + parser.add_argument( + "--no-dry-run", + action="store_false", + dest="dry_run", + default=True, + help="Disable dry run mode and apply pruning for real on the database", + ) + + @transaction.atomic + def handle(self, *args, **options): + candidates = models.Upload.objects.filter(source__startswith="file://") + candidates = candidates.filter(audio_file__in=["", None]) + total = candidates.count() + self.stdout.write("Checking {} in-place imported files…".format(total)) + + missing = [] + for i, row in enumerate(candidates.values("id", "source")): + path = row["source"].replace("file://", "") + progress(self.stdout, i + 1, total) + if not os.path.exists(path): + missing.append((path, row["id"])) + + if missing: + for path, _ in missing: + self.stdout.write(" {}".format(path)) + self.stdout.write( + "The previous {} paths are referenced in database, but not found on disk!".format( + len(missing) + ) + ) + + else: + self.stdout.write("All in-place imports have a matching on-disk file") + return + + to_delete = candidates.filter(pk__in=[id for _, id in missing]) + if options["dry_run"]: + self.stdout.write( + "Nothing was deleted, rerun this command with --no-dry-run to apply the changes" + ) + else: + self.stdout.write("Deleting {} uploads…".format(to_delete.count())) + to_delete.delete() diff --git a/api/tests/music/test_commands.py b/api/tests/music/test_commands.py index 22e84beff..a08f1b10b 100644 --- a/api/tests/music/test_commands.py +++ b/api/tests/music/test_commands.py @@ -1,6 +1,7 @@ import os import pytest +from funkwhale_api.music.management.commands import check_inplace_files from funkwhale_api.music.management.commands import fix_uploads from funkwhale_api.music.management.commands import prune_library @@ -150,3 +151,35 @@ def test_prune_library(factories, mocker): for o in [not_prunable_track, not_prunable_album, not_prunable_artist]: o.refresh_from_db() + + +def test_check_inplace_files_dry_run(factories, tmpfile): + prunable = factories["music.Upload"](source="file:///notfound", audio_file=None) + not_prunable = factories["music.Upload"]( + source="file://{}".format(tmpfile.name), audio_file=None + ) + c = check_inplace_files.Command() + c.handle(dry_run=True) + + for u in [prunable, not_prunable]: + # nothing pruned, because dry run + u.refresh_from_db() + + +def test_check_inplace_files_no_dry_run(factories, tmpfile): + prunable = factories["music.Upload"](source="file:///notfound", audio_file=None) + not_prunable = [ + factories["music.Upload"]( + source="file://{}".format(tmpfile.name), audio_file=None + ), + factories["music.Upload"](source="upload://"), + factories["music.Upload"](source="https://"), + ] + c = check_inplace_files.Command() + c.handle(dry_run=False) + + with pytest.raises(prunable.DoesNotExist): + prunable.refresh_from_db() + + for u in not_prunable: + u.refresh_from_db() diff --git a/changes/changelog.d/781.enhancement b/changes/changelog.d/781.enhancement new file mode 100644 index 000000000..e3dd2597a --- /dev/null +++ b/changes/changelog.d/781.enhancement @@ -0,0 +1 @@ +Added a `check_inplace_files` management command to remove purge the database from references to in-place imported files that don't exist on disk anymore (#781) diff --git a/changes/notes.rst b/changes/notes.rst index 858ed5eeb..aa7935116 100644 --- a/changes/notes.rst +++ b/changes/notes.rst @@ -51,5 +51,16 @@ Users are often surprised by Funkwhale's tendency to keep track, album and artis metadata even if no associated files exist. To help with that, we now offer a ``prune_library`` management command you can run -to purge your database from obsolete entry. `Please refer to our documentation +to purge your database from obsolete entries. `Please refer to our documentation for usage instructions `_. + +Check in-place files command +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When using in-place import with a living audio library, you'll quite often rename or +remove files from the file system. Unfortunately, Funkwhale keeps a reference to those +files in the database, which results in unplayable tracks. + +To help with that, we now offer a ``check_inplace_files`` management command you can run +to purge your database from obsolete files. `Please refer to our documentation +for usage instructions `_. diff --git a/docs/admin/commands.rst b/docs/admin/commands.rst index 1525804da..c30a67a99 100644 --- a/docs/admin/commands.rst +++ b/docs/admin/commands.rst @@ -55,3 +55,28 @@ the changes on the database. history by default. If you want to include those in the pruning process as well, add the corresponding ``--ignore-favorites``, ``--ignore-playlists`` and ``--ignore-listenings`` flags. + +Remove obsolete files from database +----------------------------------- + +When importing using the :ref:`in-place method `, if you move or remove +in-place imported files on disk, Funkwhale will still have a reference to those files and won't +be able to serve them properly. + +To help with that, whenever you remove or move files that were previously imported +with the ``--in-place`` flag, you can run the following command:: + + python manage.py check_inplace_files + +This command will loop through all the database objects that reference +an in-place imported file, check that the file is accessible on disk, +or delete the database object if it's not. + +Once you have reviewed the output and are comfortable with the changes, you should rerun +the command with the ``--no-dry-run`` flag to disable dry run mode and actually delete the +database objects. + +.. warning:: + + Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup, + there will be no way to retrieve the deleted data.