diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index fe1e2b0c9..fc84922a3 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -5,6 +5,7 @@ import os import tempfile import urllib.parse import uuid +from random import randint import arrow import slugify @@ -16,7 +17,7 @@ from django.core.exceptions import ObjectDoesNotExist from django.core.files.base import ContentFile from django.core.serializers.json import DjangoJSONEncoder from django.db import models, transaction -from django.db.models import Count, JSONField, Prefetch +from django.db.models import Count, JSONField, Max, Min, Prefetch from django.db.models.expressions import OuterRef, Subquery from django.db.models.query_utils import Q from django.db.models.signals import post_save, pre_save @@ -542,6 +543,26 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet): """ return self.order_by("disc_number", "position", "title") + def random(self, batch_size): + bounds = self.aggregate(min_id=Min("id"), max_id=Max("id")) + min_id, max_id = bounds["min_id"], bounds["max_id"] + + if min_id is None or max_id is None: + return self.none() + + tries = 0 + max_tries = 10 + found_ids = set() + + while len(found_ids) < batch_size and tries < max_tries: + candidate_ids = [randint(min_id, max_id) for _ in range(batch_size * 2)] + found_ids.update( + self.filter(id__in=candidate_ids).values_list("id", flat=True) + ) + tries += 1 + + return self.filter(id__in=list(found_ids)[:batch_size]).order_by("?") + def get_artist(release_list): return Artist.get_or_create_from_api( diff --git a/api/funkwhale_api/radios/radios_v2.py b/api/funkwhale_api/radios/radios_v2.py index d13d1eb82..7785f70d0 100644 --- a/api/funkwhale_api/radios/radios_v2.py +++ b/api/funkwhale_api/radios/radios_v2.py @@ -109,7 +109,7 @@ class SessionRadio(SimpleRadio): queryset = self.filter_queryset(queryset) # select a random batch of the qs - sliced_queryset = queryset.order_by("?")[:BATCH_SIZE] + sliced_queryset = queryset.random(BATCH_SIZE) if len(sliced_queryset) <= 0 and not cached_evaluated_radio_tracks: raise ValueError("No more radio candidates") @@ -166,7 +166,7 @@ class SessionRadio(SimpleRadio): class RandomRadio(SessionRadio): def get_queryset(self, **kwargs): qs = super().get_queryset(**kwargs) - return qs.filter(artist_credit__artist__content_category="music").order_by("?") + return qs.filter(artist_credit__artist__content_category="music").random(100) @registry.register(name="random_library") @@ -179,7 +179,7 @@ class RandomLibraryRadio(SessionRadio): query = Q(artist_credit__artist__content_category="music") & Q( pk__in=tracks_ids ) - return qs.filter(query).order_by("?") + return qs.filter(query).random(100) @registry.register(name="favorites") @@ -390,7 +390,7 @@ class LessListenedRadio(SessionRadio): return ( qs.filter(artist_credit__artist__content_category="music") .exclude(pk__in=listened) - .order_by("?") + .random(100) ) @@ -411,7 +411,7 @@ class LessListenedLibraryRadio(SessionRadio): query = Q(artist_credit__artist__content_category="music") & Q( pk__in=tracks_ids ) - return qs.filter(query).exclude(pk__in=listened).order_by("?") + return qs.filter(query).exclude(pk__in=listened).random(100) @registry.register(name="actor-content") diff --git a/changes/changelog.d/2450.enhancement b/changes/changelog.d/2450.enhancement new file mode 100644 index 000000000..519c9a17b --- /dev/null +++ b/changes/changelog.d/2450.enhancement @@ -0,0 +1 @@ +Optimize radios queryset to support large tables (#2450)