Merge branch '777-library-pruning' into 'develop'
Fix #777: Added a prune_library management command to remove obsolete metadata Closes #777 See merge request funkwhale/funkwhale!698
This commit is contained in:
commit
7bb0fa2e64
|
@ -0,0 +1,145 @@
|
||||||
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.core.management.base import CommandError
|
||||||
|
|
||||||
|
from django.db import transaction
|
||||||
|
|
||||||
|
from funkwhale_api.music import models, tasks
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = """
|
||||||
|
Remove tracks, albums and artists that are not associated with any file from the instance library:
|
||||||
|
|
||||||
|
- Tracks without uploads are deleted, if the --tracks flag is passed
|
||||||
|
- Albums without tracks are deleted, if the --albums flag is passed
|
||||||
|
- Artists without albums are deleted, if the --artists flag is passed
|
||||||
|
|
||||||
|
Tracks with associated favorites, playlists or listening won't be deleted
|
||||||
|
by default, unless you pass the corresponding --ignore-* flags.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def create_parser(self, *args, **kwargs):
|
||||||
|
parser = super().create_parser(*args, **kwargs)
|
||||||
|
parser.formatter_class = RawTextHelpFormatter
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-dry-run",
|
||||||
|
action="store_false",
|
||||||
|
dest="dry_run",
|
||||||
|
default=True,
|
||||||
|
help="Disable dry run mode and apply pruning for real on the database",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--artists",
|
||||||
|
action="store_true",
|
||||||
|
dest="prune_artists",
|
||||||
|
default=False,
|
||||||
|
help="Prune artists without albums/tracks",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--albums",
|
||||||
|
action="store_true",
|
||||||
|
dest="prune_albums",
|
||||||
|
default=False,
|
||||||
|
help="Prune albums without tracks",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tracks",
|
||||||
|
action="store_true",
|
||||||
|
dest="prune_tracks",
|
||||||
|
default=False,
|
||||||
|
help="Prune tracks without uploads",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--ignore-favorites",
|
||||||
|
action="store_false",
|
||||||
|
dest="exclude_favorites",
|
||||||
|
default=True,
|
||||||
|
help="Allow favorited tracks to be pruned",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--ignore-playlists",
|
||||||
|
action="store_false",
|
||||||
|
dest="exclude_playlists",
|
||||||
|
default=True,
|
||||||
|
help="Allow tracks included in playlists to be pruned",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--ignore-listenings",
|
||||||
|
action="store_false",
|
||||||
|
dest="exclude_listenings",
|
||||||
|
default=True,
|
||||||
|
help="Allow tracks with listening history to be pruned",
|
||||||
|
)
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
if not any(
|
||||||
|
[options["prune_albums"], options["prune_tracks"], options["prune_artists"]]
|
||||||
|
):
|
||||||
|
raise CommandError(
|
||||||
|
"You need to provide at least one of the --tracks, --albums or --artists flags"
|
||||||
|
)
|
||||||
|
|
||||||
|
if options["dry_run"]:
|
||||||
|
self.stdout.write("Dry-run on, will not commit anything")
|
||||||
|
else:
|
||||||
|
self.stdout.write("Dry-run off, *pruning for real*")
|
||||||
|
self.stdout.write("")
|
||||||
|
if options["prune_tracks"]:
|
||||||
|
prunable = tasks.get_prunable_tracks(
|
||||||
|
exclude_favorites=options["exclude_favorites"],
|
||||||
|
exclude_playlists=options["exclude_playlists"],
|
||||||
|
exclude_listenings=options["exclude_listenings"],
|
||||||
|
)
|
||||||
|
pruned_total = prunable.count()
|
||||||
|
total = models.Track.objects.count()
|
||||||
|
if options["dry_run"]:
|
||||||
|
self.stdout.write(
|
||||||
|
"Would prune {}/{} tracks".format(pruned_total, total)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.stdout.write("Deleting {}/{} tracks…".format(pruned_total, total))
|
||||||
|
prunable.delete()
|
||||||
|
|
||||||
|
if options["prune_albums"]:
|
||||||
|
prunable = tasks.get_prunable_albums()
|
||||||
|
pruned_total = prunable.count()
|
||||||
|
total = models.Album.objects.count()
|
||||||
|
if options["dry_run"]:
|
||||||
|
self.stdout.write(
|
||||||
|
"Would prune {}/{} albums".format(pruned_total, total)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.stdout.write("Deleting {}/{} albums…".format(pruned_total, total))
|
||||||
|
prunable.delete()
|
||||||
|
|
||||||
|
if options["prune_artists"]:
|
||||||
|
prunable = tasks.get_prunable_artists()
|
||||||
|
pruned_total = prunable.count()
|
||||||
|
total = models.Artist.objects.count()
|
||||||
|
if options["dry_run"]:
|
||||||
|
self.stdout.write(
|
||||||
|
"Would prune {}/{} artists".format(pruned_total, total)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.stdout.write("Deleting {}/{} artists…".format(pruned_total, total))
|
||||||
|
prunable.delete()
|
||||||
|
|
||||||
|
self.stdout.write("")
|
||||||
|
if options["dry_run"]:
|
||||||
|
self.stdout.write(
|
||||||
|
"Nothing was pruned, rerun this command with --no-dry-run to apply the changes"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.stdout.write("Pruning completed!")
|
||||||
|
|
||||||
|
self.stdout.write("")
|
|
@ -568,3 +568,31 @@ def clean_transcoding_cache():
|
||||||
.order_by("id")
|
.order_by("id")
|
||||||
)
|
)
|
||||||
return candidates.delete()
|
return candidates.delete()
|
||||||
|
|
||||||
|
|
||||||
|
def get_prunable_tracks(
|
||||||
|
exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Returns a list of tracks with no associated uploads,
|
||||||
|
excluding the one that were listened/favorited/included in playlists.
|
||||||
|
"""
|
||||||
|
|
||||||
|
queryset = models.Track.objects.all()
|
||||||
|
queryset = queryset.filter(uploads__isnull=True)
|
||||||
|
if exclude_favorites:
|
||||||
|
queryset = queryset.filter(track_favorites__isnull=True)
|
||||||
|
if exclude_playlists:
|
||||||
|
queryset = queryset.filter(playlist_tracks__isnull=True)
|
||||||
|
if exclude_listenings:
|
||||||
|
queryset = queryset.filter(listenings__isnull=True)
|
||||||
|
|
||||||
|
return queryset
|
||||||
|
|
||||||
|
|
||||||
|
def get_prunable_albums():
|
||||||
|
return models.Album.objects.filter(tracks__isnull=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_prunable_artists():
|
||||||
|
return models.Artist.objects.filter(tracks__isnull=True, albums__isnull=True)
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import os
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
from funkwhale_api.music.management.commands import fix_uploads
|
from funkwhale_api.music.management.commands import fix_uploads
|
||||||
|
from funkwhale_api.music.management.commands import prune_library
|
||||||
|
|
||||||
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
|
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
@ -73,3 +75,78 @@ def test_fix_uploads_mimetype(factories, mocker):
|
||||||
|
|
||||||
assert upload1.mimetype == "audio/mpeg"
|
assert upload1.mimetype == "audio/mpeg"
|
||||||
assert upload2.mimetype == "audio/something"
|
assert upload2.mimetype == "audio/something"
|
||||||
|
|
||||||
|
|
||||||
|
def test_prune_library_dry_run(factories):
|
||||||
|
prunable = factories["music.Track"]()
|
||||||
|
not_prunable = factories["music.Track"]()
|
||||||
|
c = prune_library.Command()
|
||||||
|
options = {
|
||||||
|
"prune_artists": True,
|
||||||
|
"prune_albums": True,
|
||||||
|
"prune_tracks": True,
|
||||||
|
"exclude_favorites": False,
|
||||||
|
"exclude_listenings": False,
|
||||||
|
"exclude_playlists": False,
|
||||||
|
"dry_run": True,
|
||||||
|
}
|
||||||
|
c.handle(**options)
|
||||||
|
|
||||||
|
for t in [prunable, not_prunable]:
|
||||||
|
# nothing pruned, because dry run
|
||||||
|
t.refresh_from_db()
|
||||||
|
|
||||||
|
|
||||||
|
def test_prune_library(factories, mocker):
|
||||||
|
prunable_track = factories["music.Track"]()
|
||||||
|
not_prunable_track = factories["music.Track"]()
|
||||||
|
prunable_tracks = prunable_track.__class__.objects.filter(pk=prunable_track.pk)
|
||||||
|
get_prunable_tracks = mocker.patch(
|
||||||
|
"funkwhale_api.music.tasks.get_prunable_tracks", return_value=prunable_tracks
|
||||||
|
)
|
||||||
|
|
||||||
|
prunable_album = factories["music.Album"]()
|
||||||
|
not_prunable_album = factories["music.Album"]()
|
||||||
|
prunable_albums = prunable_album.__class__.objects.filter(pk=prunable_album.pk)
|
||||||
|
get_prunable_albums = mocker.patch(
|
||||||
|
"funkwhale_api.music.tasks.get_prunable_albums", return_value=prunable_albums
|
||||||
|
)
|
||||||
|
|
||||||
|
prunable_artist = factories["music.Artist"]()
|
||||||
|
not_prunable_artist = factories["music.Artist"]()
|
||||||
|
prunable_artists = prunable_artist.__class__.objects.filter(pk=prunable_artist.pk)
|
||||||
|
get_prunable_artists = mocker.patch(
|
||||||
|
"funkwhale_api.music.tasks.get_prunable_artists", return_value=prunable_artists
|
||||||
|
)
|
||||||
|
|
||||||
|
c = prune_library.Command()
|
||||||
|
options = {
|
||||||
|
"exclude_favorites": mocker.Mock(),
|
||||||
|
"exclude_listenings": mocker.Mock(),
|
||||||
|
"exclude_playlists": mocker.Mock(),
|
||||||
|
"prune_artists": True,
|
||||||
|
"prune_albums": True,
|
||||||
|
"prune_tracks": True,
|
||||||
|
"dry_run": False,
|
||||||
|
}
|
||||||
|
c.handle(**options)
|
||||||
|
|
||||||
|
get_prunable_tracks.assert_called_once_with(
|
||||||
|
exclude_favorites=options["exclude_favorites"],
|
||||||
|
exclude_listenings=options["exclude_listenings"],
|
||||||
|
exclude_playlists=options["exclude_playlists"],
|
||||||
|
)
|
||||||
|
get_prunable_albums.assert_called_once()
|
||||||
|
get_prunable_artists.assert_called_once()
|
||||||
|
|
||||||
|
with pytest.raises(prunable_track.DoesNotExist):
|
||||||
|
prunable_track.refresh_from_db()
|
||||||
|
|
||||||
|
with pytest.raises(prunable_album.DoesNotExist):
|
||||||
|
prunable_album.refresh_from_db()
|
||||||
|
|
||||||
|
with pytest.raises(prunable_artist.DoesNotExist):
|
||||||
|
prunable_artist.refresh_from_db()
|
||||||
|
|
||||||
|
for o in [not_prunable_track, not_prunable_album, not_prunable_artist]:
|
||||||
|
o.refresh_from_db()
|
||||||
|
|
|
@ -637,3 +637,72 @@ def test_clean_transcoding_cache(preferences, now, factories):
|
||||||
|
|
||||||
with pytest.raises(u1.__class__.DoesNotExist):
|
with pytest.raises(u1.__class__.DoesNotExist):
|
||||||
u1.refresh_from_db()
|
u1.refresh_from_db()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_prunable_tracks(factories):
|
||||||
|
prunable_track = factories["music.Track"]()
|
||||||
|
# non prunable tracks
|
||||||
|
factories["music.Upload"]()
|
||||||
|
factories["favorites.TrackFavorite"]()
|
||||||
|
factories["history.Listening"]()
|
||||||
|
factories["playlists.PlaylistTrack"]()
|
||||||
|
|
||||||
|
assert list(tasks.get_prunable_tracks()) == [prunable_track]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_prunable_tracks_include_favorites(factories):
|
||||||
|
prunable_track = factories["music.Track"]()
|
||||||
|
favorited = factories["favorites.TrackFavorite"]().track
|
||||||
|
# non prunable tracks
|
||||||
|
factories["favorites.TrackFavorite"](track__playable=True)
|
||||||
|
factories["music.Upload"]()
|
||||||
|
factories["history.Listening"]()
|
||||||
|
factories["playlists.PlaylistTrack"]()
|
||||||
|
|
||||||
|
qs = tasks.get_prunable_tracks(exclude_favorites=False).order_by("id")
|
||||||
|
assert list(qs) == [prunable_track, favorited]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_prunable_tracks_include_playlists(factories):
|
||||||
|
prunable_track = factories["music.Track"]()
|
||||||
|
in_playlist = factories["playlists.PlaylistTrack"]().track
|
||||||
|
# non prunable tracks
|
||||||
|
factories["favorites.TrackFavorite"]()
|
||||||
|
factories["music.Upload"]()
|
||||||
|
factories["history.Listening"]()
|
||||||
|
factories["playlists.PlaylistTrack"](track__playable=True)
|
||||||
|
|
||||||
|
qs = tasks.get_prunable_tracks(exclude_playlists=False).order_by("id")
|
||||||
|
assert list(qs) == [prunable_track, in_playlist]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_prunable_tracks_include_listenings(factories):
|
||||||
|
prunable_track = factories["music.Track"]()
|
||||||
|
listened = factories["history.Listening"]().track
|
||||||
|
# non prunable tracks
|
||||||
|
factories["favorites.TrackFavorite"]()
|
||||||
|
factories["music.Upload"]()
|
||||||
|
factories["history.Listening"](track__playable=True)
|
||||||
|
factories["playlists.PlaylistTrack"]()
|
||||||
|
|
||||||
|
qs = tasks.get_prunable_tracks(exclude_listenings=False).order_by("id")
|
||||||
|
assert list(qs) == [prunable_track, listened]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_prunable_albums(factories):
|
||||||
|
prunable_album = factories["music.Album"]()
|
||||||
|
# non prunable album
|
||||||
|
factories["music.Track"]().album
|
||||||
|
|
||||||
|
assert list(tasks.get_prunable_albums()) == [prunable_album]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_prunable_artists(factories):
|
||||||
|
prunable_artist = factories["music.Artist"]()
|
||||||
|
# non prunable artist
|
||||||
|
non_prunable_artist = factories["music.Artist"]()
|
||||||
|
non_prunable_album_artist = factories["music.Artist"]()
|
||||||
|
factories["music.Track"](artist=non_prunable_artist)
|
||||||
|
factories["music.Track"](album__artist=non_prunable_album_artist)
|
||||||
|
|
||||||
|
assert list(tasks.get_prunable_artists()) == [prunable_artist]
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Added a prune_library management command to remove obsolete metadata from the database (#777)
|
|
@ -34,3 +34,13 @@ enabled in a future release).
|
||||||
|
|
||||||
If you want to start building an app on top of Funkwhale's API, please check-out
|
If you want to start building an app on top of Funkwhale's API, please check-out
|
||||||
`https://docs.funkwhale.audio/api.html`_ and `https://docs.funkwhale.audio/developers/authentication.html`_.
|
`https://docs.funkwhale.audio/api.html`_ and `https://docs.funkwhale.audio/developers/authentication.html`_.
|
||||||
|
|
||||||
|
Prune library command
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Users are often surprised by Funkwhale's tendency to keep track, album and artist
|
||||||
|
metadata even if no associated files exist.
|
||||||
|
|
||||||
|
To help with that, we now offer a ``prune_library`` management command you can run
|
||||||
|
to purge your database from obsolete entry. `Please refer to our documentation
|
||||||
|
for usage instructions <https://docs.funkwhale.audio/admin/commands.html#pruning-library>`_.
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
Management commands
|
||||||
|
===================
|
||||||
|
|
||||||
|
Pruning library
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Because Funkwhale is a multi-user and federated audio server, we don't delete any artist, album
|
||||||
|
and track objects in the database when you delete the corresponding files.
|
||||||
|
|
||||||
|
This is on purpose, because those objects may be referenced in user playlists, favorites,
|
||||||
|
listening history or on other instances, or other users could have upload files matching
|
||||||
|
linked to those entities in their own private libraries.
|
||||||
|
|
||||||
|
Therefore, Funkwhale has a really conservative approach and doesn't delete metadata when
|
||||||
|
audio files are deleted.
|
||||||
|
|
||||||
|
This behaviour can be problematic in some situations though, e.g. if you imported
|
||||||
|
a lot of wrongly tagged files, then deleted the files to reimport them later.
|
||||||
|
|
||||||
|
To help with that, we provide a management you can run on the server and that will effectively
|
||||||
|
prune you library from track, album and artist metadata that is not tied to any file:
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
# print help
|
||||||
|
python manage.py prune_library --help
|
||||||
|
|
||||||
|
# prune tracks with no uploads
|
||||||
|
python manage.py prune_library --tracks
|
||||||
|
|
||||||
|
# prune albums with no tracks
|
||||||
|
python manage.py prune_library --albums
|
||||||
|
|
||||||
|
# prune artists with no tracks/albums
|
||||||
|
python manage.py prune_library --artists
|
||||||
|
|
||||||
|
# prune everything (tracks, albums and artists)
|
||||||
|
python manage.py prune_library --tracks --albums --artists
|
||||||
|
|
||||||
|
The ``prune_library`` command will not delete anything by default, and only gives
|
||||||
|
you an estimate of how many database objects would be affected by the pruning.
|
||||||
|
|
||||||
|
Once you have reviewed the output and are comfortable with the changes, you should rerun
|
||||||
|
the command with the ``--no-dry-run`` flag to disable dry run mode and actually apply
|
||||||
|
the changes on the database.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup,
|
||||||
|
there will be no way to retrieve the deleted data.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The command will exclude tracks that are favorited, included in playlists or listening
|
||||||
|
history by default. If you want to include those in the pruning process as well,
|
||||||
|
add the corresponding ``--ignore-favorites``, ``--ignore-playlists`` and ``--ignore-listenings``
|
||||||
|
flags.
|
|
@ -22,6 +22,7 @@ Administration
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
django
|
django
|
||||||
|
commands
|
||||||
url
|
url
|
||||||
upgrading
|
upgrading
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue