Fix #994: use PostgreSQL full-text-search

This commit is contained in:
Eliot Berriot 2019-12-16 17:46:32 +01:00
parent 8f0eabcb71
commit b3d8d6a4da
No known key found for this signature in database
GPG Key ID: 6B501DFD73514E14
6 changed files with 148 additions and 5 deletions

View File

@ -928,3 +928,4 @@ MODERATION_EMAIL_NOTIFICATIONS_ENABLED = env.bool(
# Delay in days after signup before we show the "support us" messages # Delay in days after signup before we show the "support us" messages
INSTANCE_SUPPORT_MESSAGE_DELAY = env.int("INSTANCE_SUPPORT_MESSAGE_DELAY", default=15) INSTANCE_SUPPORT_MESSAGE_DELAY = env.int("INSTANCE_SUPPORT_MESSAGE_DELAY", default=15)
FUNKWHALE_SUPPORT_MESSAGE_DELAY = env.int("FUNKWHALE_SUPPORT_MESSAGE_DELAY", default=15) FUNKWHALE_SUPPORT_MESSAGE_DELAY = env.int("FUNKWHALE_SUPPORT_MESSAGE_DELAY", default=15)
USE_FULL_TEXT_SEARCH = env.bool("USE_FULL_TEXT_SEARCH", default=False)

View File

@ -1,5 +1,6 @@
import re import re
from django.contrib.postgres.search import SearchQuery
from django.db.models import Q from django.db.models import Q
@ -56,6 +57,17 @@ def get_query(query_string, search_fields):
return query return query
def get_fts_query(query_string):
if not query_string.startswith('"') and not query_string.endswith('"'):
parts = query_string.split(" ")
parts = ["{}:*".format(p) for p in parts if p]
if not parts:
return Q(pk=None)
query_string = "&".join(parts)
return Q(body_text=SearchQuery(query_string, search_type="raw"))
def filter_tokens(tokens, valid): def filter_tokens(tokens, valid):
return [t for t in tokens if t["key"] in valid] return [t for t in tokens if t["key"] in valid]

View File

@ -0,0 +1,109 @@
# Generated by Django 2.2.7 on 2019-12-16 15:06
import django.contrib.postgres.search
import django.contrib.postgres.indexes
from django.db import migrations, models
import django.db.models.deletion
from django.db import connection
FIELDS = {
"music.Artist": {
"fields": [
'name',
],
"trigger_name": "music_artist_update_body_text"
},
"music.Track": {
"fields": ['title', 'copyright'],
"trigger_name": "music_track_update_body_text"
},
"music.Album": {
"fields": ['title'],
"trigger_name": "music_album_update_body_text"
},
}
def populate_body_text(apps, schema_editor):
for label, search_config in FIELDS.items():
model = apps.get_model(*label.split('.'))
print('Populating search index for {}'.format(model.__name__))
vector = django.contrib.postgres.search.SearchVector(*search_config['fields'])
model.objects.update(body_text=vector)
def rewind(apps, schema_editor):
pass
def setup_triggers(apps, schema_editor):
cursor = connection.cursor()
for label, search_config in FIELDS.items():
model = apps.get_model(*label.split('.'))
table = model._meta.db_table
print('Creating database trigger {} on {}'.format(search_config['trigger_name'], table))
sql = """
CREATE TRIGGER {trigger_name}
BEFORE INSERT OR UPDATE
ON {table}
FOR EACH ROW
EXECUTE PROCEDURE
tsvector_update_trigger(body_text, 'pg_catalog.english', {fields})
""".format(
trigger_name=search_config['trigger_name'],
table=table,
fields=', '.join(search_config['fields']),
)
print(sql)
cursor.execute(sql)
def rewind_triggers(apps, schema_editor):
cursor = connection.cursor()
for label, search_config in FIELDS.items():
model = apps.get_model(*label.split('.'))
table = model._meta.db_table
print('Dropping database trigger {} on {}'.format(search_config['trigger_name'], table))
sql = """
DROP TRIGGER IF EXISTS {trigger_name} ON {table}
""".format(
trigger_name=search_config['trigger_name'],
table=table,
)
cursor.execute(sql)
class Migration(migrations.Migration):
dependencies = [
('music', '0043_album_cover_attachment'),
]
operations = [
migrations.AddField(
model_name='album',
name='body_text',
field=django.contrib.postgres.search.SearchVectorField(blank=True),
),
migrations.AddField(
model_name='artist',
name='body_text',
field=django.contrib.postgres.search.SearchVectorField(blank=True),
),
migrations.AddField(
model_name='track',
name='body_text',
field=django.contrib.postgres.search.SearchVectorField(blank=True),
),
migrations.AddIndex(
model_name='album',
index=django.contrib.postgres.indexes.GinIndex(fields=['body_text'], name='music_album_body_te_0ec97a_gin'),
),
migrations.AddIndex(
model_name='artist',
index=django.contrib.postgres.indexes.GinIndex(fields=['body_text'], name='music_artis_body_te_5c408d_gin'),
),
migrations.AddIndex(
model_name='track',
index=django.contrib.postgres.indexes.GinIndex(fields=['body_text'], name='music_track_body_te_da0a66_gin'),
),
migrations.RunPython(setup_triggers, rewind_triggers),
migrations.RunPython(populate_body_text, rewind),
]

View File

@ -11,6 +11,8 @@ import pydub
from django.conf import settings from django.conf import settings
from django.contrib.contenttypes.fields import GenericRelation from django.contrib.contenttypes.fields import GenericRelation
from django.contrib.postgres.fields import JSONField from django.contrib.postgres.fields import JSONField
from django.contrib.postgres.search import SearchVectorField
from django.contrib.postgres.indexes import GinIndex
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.core.serializers.json import DjangoJSONEncoder from django.core.serializers.json import DjangoJSONEncoder
@ -19,7 +21,6 @@ from django.db.models.signals import post_save, pre_save
from django.dispatch import receiver from django.dispatch import receiver
from django.urls import reverse from django.urls import reverse
from django.utils import timezone from django.utils import timezone
from versatileimagefield.fields import VersatileImageField from versatileimagefield.fields import VersatileImageField
from funkwhale_api import musicbrainz from funkwhale_api import musicbrainz
@ -56,10 +57,14 @@ class APIModelMixin(models.Model):
api_includes = [] api_includes = []
creation_date = models.DateTimeField(default=timezone.now, db_index=True) creation_date = models.DateTimeField(default=timezone.now, db_index=True)
import_hooks = [] import_hooks = []
body_text = SearchVectorField(blank=True)
class Meta: class Meta:
abstract = True abstract = True
ordering = ["-creation_date"] ordering = ["-creation_date"]
indexes = [
GinIndex(fields=["body_text"]),
]
@classmethod @classmethod
def get_or_create_from_api(cls, mbid): def get_or_create_from_api(cls, mbid):
@ -524,6 +529,9 @@ class Track(APIModelMixin):
class Meta: class Meta:
ordering = ["album", "disc_number", "position"] ordering = ["album", "disc_number", "position"]
indexes = [
GinIndex(fields=["body_text"]),
]
def __str__(self): def __str__(self):
return self.title return self.title

View File

@ -4,7 +4,11 @@ import magic
import mutagen import mutagen
import pydub import pydub
from funkwhale_api.common.search import normalize_query, get_query # noqa from funkwhale_api.common.search import (
normalize_query,
get_query,
get_fts_query,
) # noqa
def guess_mimetype(f): def guess_mimetype(f):

View File

@ -629,7 +629,10 @@ class Search(views.APIView):
"album__title__unaccent", "album__title__unaccent",
"artist__name__unaccent", "artist__name__unaccent",
] ]
query_obj = utils.get_query(query, search_fields) if settings.USE_FULL_TEXT_SEARCH:
query_obj = utils.get_fts_query(query)
else:
query_obj = utils.get_query(query, search_fields)
qs = ( qs = (
models.Track.objects.all() models.Track.objects.all()
.filter(query_obj) .filter(query_obj)
@ -639,7 +642,10 @@ class Search(views.APIView):
def get_albums(self, query): def get_albums(self, query):
search_fields = ["mbid", "title__unaccent", "artist__name__unaccent"] search_fields = ["mbid", "title__unaccent", "artist__name__unaccent"]
query_obj = utils.get_query(query, search_fields) if settings.USE_FULL_TEXT_SEARCH:
query_obj = utils.get_fts_query(query)
else:
query_obj = utils.get_query(query, search_fields)
qs = ( qs = (
models.Album.objects.all() models.Album.objects.all()
.filter(query_obj) .filter(query_obj)
@ -649,7 +655,10 @@ class Search(views.APIView):
def get_artists(self, query): def get_artists(self, query):
search_fields = ["mbid", "name__unaccent"] search_fields = ["mbid", "name__unaccent"]
query_obj = utils.get_query(query, search_fields) if settings.USE_FULL_TEXT_SEARCH:
query_obj = utils.get_fts_query(query)
else:
query_obj = utils.get_query(query, search_fields)
qs = models.Artist.objects.all().filter(query_obj).with_albums() qs = models.Artist.objects.all().filter(query_obj).with_albums()
return common_utils.order_for_search(qs, "name")[: self.max_results] return common_utils.order_for_search(qs, "name")[: self.max_results]