Fix #777: Added a prune_library management command to remove obsolete metadata
This commit is contained in:
parent
96010917fb
commit
5916a1ba99
|
@ -0,0 +1,145 @@
|
|||
from argparse import RawTextHelpFormatter
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
|
||||
from django.db import transaction
|
||||
|
||||
from funkwhale_api.music import models, tasks
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """
|
||||
Remove tracks, albums and artists that are not associated with any file from the instance library:
|
||||
|
||||
- Tracks without uploads are deleted, if the --tracks flag is passed
|
||||
- Albums without tracks are deleted, if the --albums flag is passed
|
||||
- Artists without albums are deleted, if the --artists flag is passed
|
||||
|
||||
Tracks with associated favorites, playlists or listening won't be deleted
|
||||
by default, unless you pass the corresponding --ignore-* flags.
|
||||
|
||||
"""
|
||||
|
||||
def create_parser(self, *args, **kwargs):
|
||||
parser = super().create_parser(*args, **kwargs)
|
||||
parser.formatter_class = RawTextHelpFormatter
|
||||
return parser
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--no-dry-run",
|
||||
action="store_false",
|
||||
dest="dry_run",
|
||||
default=True,
|
||||
help="Disable dry run mode and apply pruning for real on the database",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--artists",
|
||||
action="store_true",
|
||||
dest="prune_artists",
|
||||
default=False,
|
||||
help="Prune artists without albums/tracks",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--albums",
|
||||
action="store_true",
|
||||
dest="prune_albums",
|
||||
default=False,
|
||||
help="Prune albums without tracks",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tracks",
|
||||
action="store_true",
|
||||
dest="prune_tracks",
|
||||
default=False,
|
||||
help="Prune tracks without uploads",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ignore-favorites",
|
||||
action="store_false",
|
||||
dest="exclude_favorites",
|
||||
default=True,
|
||||
help="Allow favorited tracks to be pruned",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ignore-playlists",
|
||||
action="store_false",
|
||||
dest="exclude_playlists",
|
||||
default=True,
|
||||
help="Allow tracks included in playlists to be pruned",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ignore-listenings",
|
||||
action="store_false",
|
||||
dest="exclude_listenings",
|
||||
default=True,
|
||||
help="Allow tracks with listening history to be pruned",
|
||||
)
|
||||
|
||||
@transaction.atomic
|
||||
def handle(self, *args, **options):
|
||||
if not any(
|
||||
[options["prune_albums"], options["prune_tracks"], options["prune_artists"]]
|
||||
):
|
||||
raise CommandError(
|
||||
"You need to provide at least one of the --tracks, --albums or --artists flags"
|
||||
)
|
||||
|
||||
if options["dry_run"]:
|
||||
self.stdout.write("Dry-run on, will not commit anything")
|
||||
else:
|
||||
self.stdout.write("Dry-run off, *pruning for real*")
|
||||
self.stdout.write("")
|
||||
if options["prune_tracks"]:
|
||||
prunable = tasks.get_prunable_tracks(
|
||||
exclude_favorites=options["exclude_favorites"],
|
||||
exclude_playlists=options["exclude_playlists"],
|
||||
exclude_listenings=options["exclude_listenings"],
|
||||
)
|
||||
pruned_total = prunable.count()
|
||||
total = models.Track.objects.count()
|
||||
if options["dry_run"]:
|
||||
self.stdout.write(
|
||||
"Would prune {}/{} tracks".format(pruned_total, total)
|
||||
)
|
||||
else:
|
||||
self.stdout.write("Deleting {}/{} tracks…".format(pruned_total, total))
|
||||
prunable.delete()
|
||||
|
||||
if options["prune_albums"]:
|
||||
prunable = tasks.get_prunable_albums()
|
||||
pruned_total = prunable.count()
|
||||
total = models.Album.objects.count()
|
||||
if options["dry_run"]:
|
||||
self.stdout.write(
|
||||
"Would prune {}/{} albums".format(pruned_total, total)
|
||||
)
|
||||
else:
|
||||
self.stdout.write("Deleting {}/{} albums…".format(pruned_total, total))
|
||||
prunable.delete()
|
||||
|
||||
if options["prune_artists"]:
|
||||
prunable = tasks.get_prunable_artists()
|
||||
pruned_total = prunable.count()
|
||||
total = models.Artist.objects.count()
|
||||
if options["dry_run"]:
|
||||
self.stdout.write(
|
||||
"Would prune {}/{} artists".format(pruned_total, total)
|
||||
)
|
||||
else:
|
||||
self.stdout.write("Deleting {}/{} artists…".format(pruned_total, total))
|
||||
prunable.delete()
|
||||
|
||||
self.stdout.write("")
|
||||
if options["dry_run"]:
|
||||
self.stdout.write(
|
||||
"Nothing was pruned, rerun this command with --no-dry-run to apply the changes"
|
||||
)
|
||||
else:
|
||||
self.stdout.write("Pruning completed!")
|
||||
|
||||
self.stdout.write("")
|
|
@ -568,3 +568,31 @@ def clean_transcoding_cache():
|
|||
.order_by("id")
|
||||
)
|
||||
return candidates.delete()
|
||||
|
||||
|
||||
def get_prunable_tracks(
|
||||
exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
|
||||
):
|
||||
"""
|
||||
Returns a list of tracks with no associated uploads,
|
||||
excluding the one that were listened/favorited/included in playlists.
|
||||
"""
|
||||
|
||||
queryset = models.Track.objects.all()
|
||||
queryset = queryset.filter(uploads__isnull=True)
|
||||
if exclude_favorites:
|
||||
queryset = queryset.filter(track_favorites__isnull=True)
|
||||
if exclude_playlists:
|
||||
queryset = queryset.filter(playlist_tracks__isnull=True)
|
||||
if exclude_listenings:
|
||||
queryset = queryset.filter(listenings__isnull=True)
|
||||
|
||||
return queryset
|
||||
|
||||
|
||||
def get_prunable_albums():
|
||||
return models.Album.objects.filter(tracks__isnull=True)
|
||||
|
||||
|
||||
def get_prunable_artists():
|
||||
return models.Artist.objects.filter(tracks__isnull=True, albums__isnull=True)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import os
|
||||
import pytest
|
||||
|
||||
from funkwhale_api.music.management.commands import fix_uploads
|
||||
from funkwhale_api.music.management.commands import prune_library
|
||||
|
||||
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
@ -73,3 +75,78 @@ def test_fix_uploads_mimetype(factories, mocker):
|
|||
|
||||
assert upload1.mimetype == "audio/mpeg"
|
||||
assert upload2.mimetype == "audio/something"
|
||||
|
||||
|
||||
def test_prune_library_dry_run(factories):
|
||||
prunable = factories["music.Track"]()
|
||||
not_prunable = factories["music.Track"]()
|
||||
c = prune_library.Command()
|
||||
options = {
|
||||
"prune_artists": True,
|
||||
"prune_albums": True,
|
||||
"prune_tracks": True,
|
||||
"exclude_favorites": False,
|
||||
"exclude_listenings": False,
|
||||
"exclude_playlists": False,
|
||||
"dry_run": True,
|
||||
}
|
||||
c.handle(**options)
|
||||
|
||||
for t in [prunable, not_prunable]:
|
||||
# nothing pruned, because dry run
|
||||
t.refresh_from_db()
|
||||
|
||||
|
||||
def test_prune_library(factories, mocker):
|
||||
prunable_track = factories["music.Track"]()
|
||||
not_prunable_track = factories["music.Track"]()
|
||||
prunable_tracks = prunable_track.__class__.objects.filter(pk=prunable_track.pk)
|
||||
get_prunable_tracks = mocker.patch(
|
||||
"funkwhale_api.music.tasks.get_prunable_tracks", return_value=prunable_tracks
|
||||
)
|
||||
|
||||
prunable_album = factories["music.Album"]()
|
||||
not_prunable_album = factories["music.Album"]()
|
||||
prunable_albums = prunable_album.__class__.objects.filter(pk=prunable_album.pk)
|
||||
get_prunable_albums = mocker.patch(
|
||||
"funkwhale_api.music.tasks.get_prunable_albums", return_value=prunable_albums
|
||||
)
|
||||
|
||||
prunable_artist = factories["music.Artist"]()
|
||||
not_prunable_artist = factories["music.Artist"]()
|
||||
prunable_artists = prunable_artist.__class__.objects.filter(pk=prunable_artist.pk)
|
||||
get_prunable_artists = mocker.patch(
|
||||
"funkwhale_api.music.tasks.get_prunable_artists", return_value=prunable_artists
|
||||
)
|
||||
|
||||
c = prune_library.Command()
|
||||
options = {
|
||||
"exclude_favorites": mocker.Mock(),
|
||||
"exclude_listenings": mocker.Mock(),
|
||||
"exclude_playlists": mocker.Mock(),
|
||||
"prune_artists": True,
|
||||
"prune_albums": True,
|
||||
"prune_tracks": True,
|
||||
"dry_run": False,
|
||||
}
|
||||
c.handle(**options)
|
||||
|
||||
get_prunable_tracks.assert_called_once_with(
|
||||
exclude_favorites=options["exclude_favorites"],
|
||||
exclude_listenings=options["exclude_listenings"],
|
||||
exclude_playlists=options["exclude_playlists"],
|
||||
)
|
||||
get_prunable_albums.assert_called_once()
|
||||
get_prunable_artists.assert_called_once()
|
||||
|
||||
with pytest.raises(prunable_track.DoesNotExist):
|
||||
prunable_track.refresh_from_db()
|
||||
|
||||
with pytest.raises(prunable_album.DoesNotExist):
|
||||
prunable_album.refresh_from_db()
|
||||
|
||||
with pytest.raises(prunable_artist.DoesNotExist):
|
||||
prunable_artist.refresh_from_db()
|
||||
|
||||
for o in [not_prunable_track, not_prunable_album, not_prunable_artist]:
|
||||
o.refresh_from_db()
|
||||
|
|
|
@ -637,3 +637,72 @@ def test_clean_transcoding_cache(preferences, now, factories):
|
|||
|
||||
with pytest.raises(u1.__class__.DoesNotExist):
|
||||
u1.refresh_from_db()
|
||||
|
||||
|
||||
def test_get_prunable_tracks(factories):
|
||||
prunable_track = factories["music.Track"]()
|
||||
# non prunable tracks
|
||||
factories["music.Upload"]()
|
||||
factories["favorites.TrackFavorite"]()
|
||||
factories["history.Listening"]()
|
||||
factories["playlists.PlaylistTrack"]()
|
||||
|
||||
assert list(tasks.get_prunable_tracks()) == [prunable_track]
|
||||
|
||||
|
||||
def test_get_prunable_tracks_include_favorites(factories):
|
||||
prunable_track = factories["music.Track"]()
|
||||
favorited = factories["favorites.TrackFavorite"]().track
|
||||
# non prunable tracks
|
||||
factories["favorites.TrackFavorite"](track__playable=True)
|
||||
factories["music.Upload"]()
|
||||
factories["history.Listening"]()
|
||||
factories["playlists.PlaylistTrack"]()
|
||||
|
||||
qs = tasks.get_prunable_tracks(exclude_favorites=False).order_by("id")
|
||||
assert list(qs) == [prunable_track, favorited]
|
||||
|
||||
|
||||
def test_get_prunable_tracks_include_playlists(factories):
|
||||
prunable_track = factories["music.Track"]()
|
||||
in_playlist = factories["playlists.PlaylistTrack"]().track
|
||||
# non prunable tracks
|
||||
factories["favorites.TrackFavorite"]()
|
||||
factories["music.Upload"]()
|
||||
factories["history.Listening"]()
|
||||
factories["playlists.PlaylistTrack"](track__playable=True)
|
||||
|
||||
qs = tasks.get_prunable_tracks(exclude_playlists=False).order_by("id")
|
||||
assert list(qs) == [prunable_track, in_playlist]
|
||||
|
||||
|
||||
def test_get_prunable_tracks_include_listenings(factories):
|
||||
prunable_track = factories["music.Track"]()
|
||||
listened = factories["history.Listening"]().track
|
||||
# non prunable tracks
|
||||
factories["favorites.TrackFavorite"]()
|
||||
factories["music.Upload"]()
|
||||
factories["history.Listening"](track__playable=True)
|
||||
factories["playlists.PlaylistTrack"]()
|
||||
|
||||
qs = tasks.get_prunable_tracks(exclude_listenings=False).order_by("id")
|
||||
assert list(qs) == [prunable_track, listened]
|
||||
|
||||
|
||||
def test_get_prunable_albums(factories):
|
||||
prunable_album = factories["music.Album"]()
|
||||
# non prunable album
|
||||
factories["music.Track"]().album
|
||||
|
||||
assert list(tasks.get_prunable_albums()) == [prunable_album]
|
||||
|
||||
|
||||
def test_get_prunable_artists(factories):
|
||||
prunable_artist = factories["music.Artist"]()
|
||||
# non prunable artist
|
||||
non_prunable_artist = factories["music.Artist"]()
|
||||
non_prunable_album_artist = factories["music.Artist"]()
|
||||
factories["music.Track"](artist=non_prunable_artist)
|
||||
factories["music.Track"](album__artist=non_prunable_album_artist)
|
||||
|
||||
assert list(tasks.get_prunable_artists()) == [prunable_artist]
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Added a prune_library management command to remove obsolete metadata from the database (#777)
|
|
@ -34,3 +34,13 @@ enabled in a future release).
|
|||
|
||||
If you want to start building an app on top of Funkwhale's API, please check-out
|
||||
`https://docs.funkwhale.audio/api.html`_ and `https://docs.funkwhale.audio/developers/authentication.html`_.
|
||||
|
||||
Prune library command
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Users are often surprised by Funkwhale's tendency to keep track, album and artist
|
||||
metadata even if no associated files exist.
|
||||
|
||||
To help with that, we now offer a ``prune_library`` management command you can run
|
||||
to purge your database from obsolete entry. `Please refer to our documentation
|
||||
for usage instructions <https://docs.funkwhale.audio/admin/commands.html#pruning-library>`_.
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
Management commands
|
||||
===================
|
||||
|
||||
Pruning library
|
||||
---------------
|
||||
|
||||
Because Funkwhale is a multi-user and federated audio server, we don't delete any artist, album
|
||||
and track objects in the database when you delete the corresponding files.
|
||||
|
||||
This is on purpose, because those objects may be referenced in user playlists, favorites,
|
||||
listening history or on other instances, or other users could have upload files matching
|
||||
linked to those entities in their own private libraries.
|
||||
|
||||
Therefore, Funkwhale has a really conservative approach and doesn't delete metadata when
|
||||
audio files are deleted.
|
||||
|
||||
This behaviour can be problematic in some situations though, e.g. if you imported
|
||||
a lot of wrongly tagged files, then deleted the files to reimport them later.
|
||||
|
||||
To help with that, we provide a management you can run on the server and that will effectively
|
||||
prune you library from track, album and artist metadata that is not tied to any file:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# print help
|
||||
python manage.py prune_library --help
|
||||
|
||||
# prune tracks with no uploads
|
||||
python manage.py prune_library --tracks
|
||||
|
||||
# prune albums with no tracks
|
||||
python manage.py prune_library --albums
|
||||
|
||||
# prune artists with no tracks/albums
|
||||
python manage.py prune_library --artists
|
||||
|
||||
# prune everything (tracks, albums and artists)
|
||||
python manage.py prune_library --tracks --albums --artists
|
||||
|
||||
The ``prune_library`` command will not delete anything by default, and only gives
|
||||
you an estimate of how many database objects would be affected by the pruning.
|
||||
|
||||
Once you have reviewed the output and are comfortable with the changes, you should rerun
|
||||
the command with the ``--no-dry-run`` flag to disable dry run mode and actually apply
|
||||
the changes on the database.
|
||||
|
||||
.. warning::
|
||||
|
||||
Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup,
|
||||
there will be no way to retrieve the deleted data.
|
||||
|
||||
.. note::
|
||||
|
||||
The command will exclude tracks that are favorited, included in playlists or listening
|
||||
history by default. If you want to include those in the pruning process as well,
|
||||
add the corresponding ``--ignore-favorites``, ``--ignore-playlists`` and ``--ignore-listenings``
|
||||
flags.
|
|
@ -22,6 +22,7 @@ Administration
|
|||
:maxdepth: 2
|
||||
|
||||
django
|
||||
commands
|
||||
url
|
||||
upgrading
|
||||
|
||||
|
|
Loading…
Reference in New Issue