diff --git a/api/funkwhale_api/music/management/commands/prune_skipped_uploads.py b/api/funkwhale_api/music/management/commands/prune_skipped_uploads.py new file mode 100644 index 000000000..8dcf75380 --- /dev/null +++ b/api/funkwhale_api/music/management/commands/prune_skipped_uploads.py @@ -0,0 +1,35 @@ +from django.core.management.base import BaseCommand + +from django.db import transaction + +from funkwhale_api.music import models + + +class Command(BaseCommand): + help = """ + This command makes it easy to prune all skipped Uploads from the database. + Due to a bug they might caused the database to grow exponentially, + especially when using in-place-imports on a regular basis. This command + helps to clean up the database again. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--force", + default=False, + help="Disable dry run mode and apply pruning for real on the database", + ) + + @transaction.atomic + def handle(self, *args, **options): + skipped = models.Uploads.objects.filter(import_status="skipped") + count = len(skipped) + if options["force"]: + skipped.delete() + print(f"Deleted {count} entries from the database.") + return + + print( + f"Would delete {count} entries from the database.\ + Run with --force to actually apply changes to the database" + ) diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index b01f68e5f..3fef2c253 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -264,7 +264,9 @@ def process_upload(upload, update_denormalization=True): upload.import_status = "skipped" upload.import_details = { "code": "already_imported_in_owned_libraries", - "duplicates": list(owned_duplicates), + # In order to avoid exponential growth of the database, we only + # reference the first known upload which gets duplicated + "duplicates": owned_duplicates[0], } upload.import_date = timezone.now() upload.save( @@ -415,6 +417,7 @@ def get_owned_duplicates(upload, track): ) .exclude(pk=upload.pk) .values_list("uuid", flat=True) + .order_by("creation_date") ) diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 414bf95e7..6be18a35b 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -452,7 +452,7 @@ def test_upload_import_skip_existing_track_in_own_library(factories, temp_signal assert duplicate.import_status == "skipped" assert duplicate.import_details == { "code": "already_imported_in_owned_libraries", - "duplicates": [str(existing.uuid)], + "duplicates": str(existing.uuid), } handler.assert_called_once_with( diff --git a/changes/changelog.d/1676.bugfix b/changes/changelog.d/1676.bugfix new file mode 100644 index 000000000..4e77e2ecd --- /dev/null +++ b/changes/changelog.d/1676.bugfix @@ -0,0 +1 @@ +Fix exponentially growing database when using in-place-imports on a regular base #1676