From 5916a1ba99ae2e2268d7d2eb48a5f819ac3bf781 Mon Sep 17 00:00:00 2001 From: Eliot Berriot Date: Wed, 27 Mar 2019 17:04:10 +0100 Subject: [PATCH] Fix #777: Added a prune_library management command to remove obsolete metadata --- .../management/commands/prune_library.py | 145 ++++++++++++++++++ api/funkwhale_api/music/tasks.py | 28 ++++ api/tests/music/test_commands.py | 77 ++++++++++ api/tests/music/test_tasks.py | 69 +++++++++ changes/changelog.d/777.enhancement | 1 + changes/notes.rst | 10 ++ docs/admin/commands.rst | 57 +++++++ docs/admin/index.rst | 1 + 8 files changed, 388 insertions(+) create mode 100644 api/funkwhale_api/music/management/commands/prune_library.py create mode 100644 changes/changelog.d/777.enhancement create mode 100644 docs/admin/commands.rst diff --git a/api/funkwhale_api/music/management/commands/prune_library.py b/api/funkwhale_api/music/management/commands/prune_library.py new file mode 100644 index 000000000..e06ee0fdb --- /dev/null +++ b/api/funkwhale_api/music/management/commands/prune_library.py @@ -0,0 +1,145 @@ +from argparse import RawTextHelpFormatter + +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError + +from django.db import transaction + +from funkwhale_api.music import models, tasks + + +class Command(BaseCommand): + help = """ + Remove tracks, albums and artists that are not associated with any file from the instance library: + + - Tracks without uploads are deleted, if the --tracks flag is passed + - Albums without tracks are deleted, if the --albums flag is passed + - Artists without albums are deleted, if the --artists flag is passed + + Tracks with associated favorites, playlists or listening won't be deleted + by default, unless you pass the corresponding --ignore-* flags. + + """ + + def create_parser(self, *args, **kwargs): + parser = super().create_parser(*args, **kwargs) + parser.formatter_class = RawTextHelpFormatter + return parser + + def add_arguments(self, parser): + parser.add_argument( + "--no-dry-run", + action="store_false", + dest="dry_run", + default=True, + help="Disable dry run mode and apply pruning for real on the database", + ) + parser.add_argument( + "--artists", + action="store_true", + dest="prune_artists", + default=False, + help="Prune artists without albums/tracks", + ) + parser.add_argument( + "--albums", + action="store_true", + dest="prune_albums", + default=False, + help="Prune albums without tracks", + ) + parser.add_argument( + "--tracks", + action="store_true", + dest="prune_tracks", + default=False, + help="Prune tracks without uploads", + ) + + parser.add_argument( + "--ignore-favorites", + action="store_false", + dest="exclude_favorites", + default=True, + help="Allow favorited tracks to be pruned", + ) + + parser.add_argument( + "--ignore-playlists", + action="store_false", + dest="exclude_playlists", + default=True, + help="Allow tracks included in playlists to be pruned", + ) + + parser.add_argument( + "--ignore-listenings", + action="store_false", + dest="exclude_listenings", + default=True, + help="Allow tracks with listening history to be pruned", + ) + + @transaction.atomic + def handle(self, *args, **options): + if not any( + [options["prune_albums"], options["prune_tracks"], options["prune_artists"]] + ): + raise CommandError( + "You need to provide at least one of the --tracks, --albums or --artists flags" + ) + + if options["dry_run"]: + self.stdout.write("Dry-run on, will not commit anything") + else: + self.stdout.write("Dry-run off, *pruning for real*") + self.stdout.write("") + if options["prune_tracks"]: + prunable = tasks.get_prunable_tracks( + exclude_favorites=options["exclude_favorites"], + exclude_playlists=options["exclude_playlists"], + exclude_listenings=options["exclude_listenings"], + ) + pruned_total = prunable.count() + total = models.Track.objects.count() + if options["dry_run"]: + self.stdout.write( + "Would prune {}/{} tracks".format(pruned_total, total) + ) + else: + self.stdout.write("Deleting {}/{} tracks…".format(pruned_total, total)) + prunable.delete() + + if options["prune_albums"]: + prunable = tasks.get_prunable_albums() + pruned_total = prunable.count() + total = models.Album.objects.count() + if options["dry_run"]: + self.stdout.write( + "Would prune {}/{} albums".format(pruned_total, total) + ) + else: + self.stdout.write("Deleting {}/{} albums…".format(pruned_total, total)) + prunable.delete() + + if options["prune_artists"]: + prunable = tasks.get_prunable_artists() + pruned_total = prunable.count() + total = models.Artist.objects.count() + if options["dry_run"]: + self.stdout.write( + "Would prune {}/{} artists".format(pruned_total, total) + ) + else: + self.stdout.write("Deleting {}/{} artists…".format(pruned_total, total)) + prunable.delete() + + self.stdout.write("") + if options["dry_run"]: + self.stdout.write( + "Nothing was pruned, rerun this command with --no-dry-run to apply the changes" + ) + else: + self.stdout.write("Pruning completed!") + + self.stdout.write("") diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index fc4da9cad..47cb4eb38 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -568,3 +568,31 @@ def clean_transcoding_cache(): .order_by("id") ) return candidates.delete() + + +def get_prunable_tracks( + exclude_favorites=True, exclude_playlists=True, exclude_listenings=True +): + """ + Returns a list of tracks with no associated uploads, + excluding the one that were listened/favorited/included in playlists. + """ + + queryset = models.Track.objects.all() + queryset = queryset.filter(uploads__isnull=True) + if exclude_favorites: + queryset = queryset.filter(track_favorites__isnull=True) + if exclude_playlists: + queryset = queryset.filter(playlist_tracks__isnull=True) + if exclude_listenings: + queryset = queryset.filter(listenings__isnull=True) + + return queryset + + +def get_prunable_albums(): + return models.Album.objects.filter(tracks__isnull=True) + + +def get_prunable_artists(): + return models.Artist.objects.filter(tracks__isnull=True, albums__isnull=True) diff --git a/api/tests/music/test_commands.py b/api/tests/music/test_commands.py index 38186dd7e..22e84beff 100644 --- a/api/tests/music/test_commands.py +++ b/api/tests/music/test_commands.py @@ -1,6 +1,8 @@ import os +import pytest from funkwhale_api.music.management.commands import fix_uploads +from funkwhale_api.music.management.commands import prune_library DATA_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -73,3 +75,78 @@ def test_fix_uploads_mimetype(factories, mocker): assert upload1.mimetype == "audio/mpeg" assert upload2.mimetype == "audio/something" + + +def test_prune_library_dry_run(factories): + prunable = factories["music.Track"]() + not_prunable = factories["music.Track"]() + c = prune_library.Command() + options = { + "prune_artists": True, + "prune_albums": True, + "prune_tracks": True, + "exclude_favorites": False, + "exclude_listenings": False, + "exclude_playlists": False, + "dry_run": True, + } + c.handle(**options) + + for t in [prunable, not_prunable]: + # nothing pruned, because dry run + t.refresh_from_db() + + +def test_prune_library(factories, mocker): + prunable_track = factories["music.Track"]() + not_prunable_track = factories["music.Track"]() + prunable_tracks = prunable_track.__class__.objects.filter(pk=prunable_track.pk) + get_prunable_tracks = mocker.patch( + "funkwhale_api.music.tasks.get_prunable_tracks", return_value=prunable_tracks + ) + + prunable_album = factories["music.Album"]() + not_prunable_album = factories["music.Album"]() + prunable_albums = prunable_album.__class__.objects.filter(pk=prunable_album.pk) + get_prunable_albums = mocker.patch( + "funkwhale_api.music.tasks.get_prunable_albums", return_value=prunable_albums + ) + + prunable_artist = factories["music.Artist"]() + not_prunable_artist = factories["music.Artist"]() + prunable_artists = prunable_artist.__class__.objects.filter(pk=prunable_artist.pk) + get_prunable_artists = mocker.patch( + "funkwhale_api.music.tasks.get_prunable_artists", return_value=prunable_artists + ) + + c = prune_library.Command() + options = { + "exclude_favorites": mocker.Mock(), + "exclude_listenings": mocker.Mock(), + "exclude_playlists": mocker.Mock(), + "prune_artists": True, + "prune_albums": True, + "prune_tracks": True, + "dry_run": False, + } + c.handle(**options) + + get_prunable_tracks.assert_called_once_with( + exclude_favorites=options["exclude_favorites"], + exclude_listenings=options["exclude_listenings"], + exclude_playlists=options["exclude_playlists"], + ) + get_prunable_albums.assert_called_once() + get_prunable_artists.assert_called_once() + + with pytest.raises(prunable_track.DoesNotExist): + prunable_track.refresh_from_db() + + with pytest.raises(prunable_album.DoesNotExist): + prunable_album.refresh_from_db() + + with pytest.raises(prunable_artist.DoesNotExist): + prunable_artist.refresh_from_db() + + for o in [not_prunable_track, not_prunable_album, not_prunable_artist]: + o.refresh_from_db() diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 0fc85a0c1..2ba95209a 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -637,3 +637,72 @@ def test_clean_transcoding_cache(preferences, now, factories): with pytest.raises(u1.__class__.DoesNotExist): u1.refresh_from_db() + + +def test_get_prunable_tracks(factories): + prunable_track = factories["music.Track"]() + # non prunable tracks + factories["music.Upload"]() + factories["favorites.TrackFavorite"]() + factories["history.Listening"]() + factories["playlists.PlaylistTrack"]() + + assert list(tasks.get_prunable_tracks()) == [prunable_track] + + +def test_get_prunable_tracks_include_favorites(factories): + prunable_track = factories["music.Track"]() + favorited = factories["favorites.TrackFavorite"]().track + # non prunable tracks + factories["favorites.TrackFavorite"](track__playable=True) + factories["music.Upload"]() + factories["history.Listening"]() + factories["playlists.PlaylistTrack"]() + + qs = tasks.get_prunable_tracks(exclude_favorites=False).order_by("id") + assert list(qs) == [prunable_track, favorited] + + +def test_get_prunable_tracks_include_playlists(factories): + prunable_track = factories["music.Track"]() + in_playlist = factories["playlists.PlaylistTrack"]().track + # non prunable tracks + factories["favorites.TrackFavorite"]() + factories["music.Upload"]() + factories["history.Listening"]() + factories["playlists.PlaylistTrack"](track__playable=True) + + qs = tasks.get_prunable_tracks(exclude_playlists=False).order_by("id") + assert list(qs) == [prunable_track, in_playlist] + + +def test_get_prunable_tracks_include_listenings(factories): + prunable_track = factories["music.Track"]() + listened = factories["history.Listening"]().track + # non prunable tracks + factories["favorites.TrackFavorite"]() + factories["music.Upload"]() + factories["history.Listening"](track__playable=True) + factories["playlists.PlaylistTrack"]() + + qs = tasks.get_prunable_tracks(exclude_listenings=False).order_by("id") + assert list(qs) == [prunable_track, listened] + + +def test_get_prunable_albums(factories): + prunable_album = factories["music.Album"]() + # non prunable album + factories["music.Track"]().album + + assert list(tasks.get_prunable_albums()) == [prunable_album] + + +def test_get_prunable_artists(factories): + prunable_artist = factories["music.Artist"]() + # non prunable artist + non_prunable_artist = factories["music.Artist"]() + non_prunable_album_artist = factories["music.Artist"]() + factories["music.Track"](artist=non_prunable_artist) + factories["music.Track"](album__artist=non_prunable_album_artist) + + assert list(tasks.get_prunable_artists()) == [prunable_artist] diff --git a/changes/changelog.d/777.enhancement b/changes/changelog.d/777.enhancement new file mode 100644 index 000000000..96a46409e --- /dev/null +++ b/changes/changelog.d/777.enhancement @@ -0,0 +1 @@ +Added a prune_library management command to remove obsolete metadata from the database (#777) diff --git a/changes/notes.rst b/changes/notes.rst index 22e626d96..8f5ee15f7 100644 --- a/changes/notes.rst +++ b/changes/notes.rst @@ -34,3 +34,13 @@ enabled in a future release). If you want to start building an app on top of Funkwhale's API, please check-out `https://docs.funkwhale.audio/api.html`_ and `https://docs.funkwhale.audio/developers/authentication.html`_. + +Prune library command +^^^^^^^^^^^^^^^^^^^^^ + +Users are often surprised by Funkwhale's tendency to keep track, album and artist +metadata even if no associated files exist. + +To help with that, we now offer a ``prune_library`` management command you can run +to purge your database from obsolete entry. `Please refer to our documentation +for usage instructions `_. diff --git a/docs/admin/commands.rst b/docs/admin/commands.rst new file mode 100644 index 000000000..1525804da --- /dev/null +++ b/docs/admin/commands.rst @@ -0,0 +1,57 @@ +Management commands +=================== + +Pruning library +--------------- + +Because Funkwhale is a multi-user and federated audio server, we don't delete any artist, album +and track objects in the database when you delete the corresponding files. + +This is on purpose, because those objects may be referenced in user playlists, favorites, +listening history or on other instances, or other users could have upload files matching +linked to those entities in their own private libraries. + +Therefore, Funkwhale has a really conservative approach and doesn't delete metadata when +audio files are deleted. + +This behaviour can be problematic in some situations though, e.g. if you imported +a lot of wrongly tagged files, then deleted the files to reimport them later. + +To help with that, we provide a management you can run on the server and that will effectively +prune you library from track, album and artist metadata that is not tied to any file: + +.. code-block:: sh + + # print help + python manage.py prune_library --help + + # prune tracks with no uploads + python manage.py prune_library --tracks + + # prune albums with no tracks + python manage.py prune_library --albums + + # prune artists with no tracks/albums + python manage.py prune_library --artists + + # prune everything (tracks, albums and artists) + python manage.py prune_library --tracks --albums --artists + +The ``prune_library`` command will not delete anything by default, and only gives +you an estimate of how many database objects would be affected by the pruning. + +Once you have reviewed the output and are comfortable with the changes, you should rerun +the command with the ``--no-dry-run`` flag to disable dry run mode and actually apply +the changes on the database. + +.. warning:: + + Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup, + there will be no way to retrieve the deleted data. + +.. note:: + + The command will exclude tracks that are favorited, included in playlists or listening + history by default. If you want to include those in the pruning process as well, + add the corresponding ``--ignore-favorites``, ``--ignore-playlists`` and ``--ignore-listenings`` + flags. diff --git a/docs/admin/index.rst b/docs/admin/index.rst index a385a2e5e..55f6bbf56 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -22,6 +22,7 @@ Administration :maxdepth: 2 django + commands url upgrading