From 30091ef69c06064ab2b6a621313b7f933e9abde8 Mon Sep 17 00:00:00 2001 From: tleb Date: Tue, 21 Apr 2020 13:50:43 +0200 Subject: [PATCH 1/4] search: ascii everywhere and unformalized whitespace --- core/search_indexes.py | 3 +++ .../search/indexes/core/user_auto.txt | 16 +++++++++++--- core/templatetags/replace.py | 22 +++++++++++++++++++ core/views/site.py | 9 +++++++- 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 core/templatetags/replace.py diff --git a/core/search_indexes.py b/core/search_indexes.py index 7bece290..b365d030 100644 --- a/core/search_indexes.py +++ b/core/search_indexes.py @@ -45,6 +45,9 @@ class UserIndex(indexes.SearchIndex, indexes.Indexable): def get_updated_field(self): return "last_update" + def prepare_auto(self, obj): + return self.prepared_data["auto"].strip() + class IndexSignalProcessor(signals.BaseSignalProcessor): def setup(self): diff --git a/core/templates/search/indexes/core/user_auto.txt b/core/templates/search/indexes/core/user_auto.txt index 4167b506..09d8005c 100644 --- a/core/templates/search/indexes/core/user_auto.txt +++ b/core/templates/search/indexes/core/user_auto.txt @@ -1,3 +1,13 @@ -{{ object.first_name }} -{{ object.last_name }} -{{ object.nick_name }} +{% load replace %} + +{% with first=object.first_name|safe|slugify last=object.last_name|safe|slugify nick=object.nick_name|default_if_none:""|safe|slugify %} + +{{ first|replace:"|-| " }} +{{ last|replace:"|-| " }} +{{ nick|replace:"|-| " }} +{{ first|cut:"-" }} +{{ last|cut:"-" }} +{{ nick|cut:"-" }} +{{ first|cut:"-" }}{{ last|cut:"-" }} + +{% endwith %} diff --git a/core/templatetags/replace.py b/core/templatetags/replace.py new file mode 100644 index 00000000..f75fa74a --- /dev/null +++ b/core/templatetags/replace.py @@ -0,0 +1,22 @@ +from django.template.exceptions import TemplateSyntaxError +from django import template +from django.template.defaultfilters import stringfilter + +register = template.Library() + + +# arg should be of the form "|foo|bar" where the first character is the +# separator between old and new in value.replace(old, new) +@register.filter +@stringfilter +def replace(value, arg): + # s.replace('', '') == s so len(arg) == 2 is fine + if len(arg) < 2: + raise TemplateSyntaxError("badly formatted argument") + + arg = arg.split(arg[0]) + + if len(arg) != 3: + raise TemplateSyntaxError("badly formatted argument") + + return value.replace(arg[1], arg[2]) diff --git a/core/views/site.py b/core/views/site.py index 468d3f32..6f1fd0ae 100644 --- a/core/views/site.py +++ b/core/views/site.py @@ -30,6 +30,7 @@ from django.contrib.auth.decorators import login_required from django.utils import html from django.views.generic import ListView, TemplateView from django.conf import settings +from django.utils.text import slugify import json @@ -73,7 +74,13 @@ def notification(request, notif_id): def search_user(query, as_json=False): try: - res = SearchQuerySet().models(User).autocomplete(auto=html.escape(query))[:20] + # slugify turns everything into ascii and every whitespace into - + # it ends by removing duplicate - (so '- - ' will turn into '-') + # replace('-', ' ') because search is whitespace based + query = slugify(query).replace("-", " ") + # is this necessary? it's not done when indexing users + query = html.escape(query) + res = SearchQuerySet().models(User).autocomplete(auto=query)[:20] return [r.object for r in res] except TypeError: return [] From a38ab57ddf66c2de0e1eefbf2bb024dad4f3e7b1 Mon Sep 17 00:00:00 2001 From: tleb Date: Tue, 21 Apr 2020 15:36:13 +0200 Subject: [PATCH 2/4] search: sort by User.last_update --- core/search_indexes.py | 1 + core/views/site.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/search_indexes.py b/core/search_indexes.py index b365d030..ce96884e 100644 --- a/core/search_indexes.py +++ b/core/search_indexes.py @@ -34,6 +34,7 @@ from forum.models import ForumMessage, ForumMessageMeta class UserIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) auto = indexes.EdgeNgramField(use_template=True) + last_update = indexes.DateTimeField(model_attr="last_update") def get_model(self): return User diff --git a/core/views/site.py b/core/views/site.py index 6f1fd0ae..9a6355a4 100644 --- a/core/views/site.py +++ b/core/views/site.py @@ -75,12 +75,17 @@ def notification(request, notif_id): def search_user(query, as_json=False): try: # slugify turns everything into ascii and every whitespace into - - # it ends by removing duplicate - (so '- - ' will turn into '-') + # it ends by removing duplicate - (so ' - ' will turn into '-') # replace('-', ' ') because search is whitespace based query = slugify(query).replace("-", " ") - # is this necessary? it's not done when indexing users + # TODO: is this necessary? query = html.escape(query) - res = SearchQuerySet().models(User).autocomplete(auto=query)[:20] + res = ( + SearchQuerySet() + .models(User) + .autocomplete(auto=query) + .order_by("-last_update")[:20] + ) return [r.object for r in res] except TypeError: return [] From 12493cffca138c92be70fef835b38f7349f4104d Mon Sep 17 00:00:00 2001 From: tleb Date: Tue, 21 Apr 2020 15:37:39 +0200 Subject: [PATCH 3/4] search: make sure we don't have indexes that are too long --- core/search_indexes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/search_indexes.py b/core/search_indexes.py index ce96884e..b98dc67b 100644 --- a/core/search_indexes.py +++ b/core/search_indexes.py @@ -47,7 +47,7 @@ class UserIndex(indexes.SearchIndex, indexes.Indexable): return "last_update" def prepare_auto(self, obj): - return self.prepared_data["auto"].strip() + return self.prepared_data["auto"].strip()[:245] class IndexSignalProcessor(signals.BaseSignalProcessor): From 6382e631b6f90b94ee8bb999ebe484691b2a59e8 Mon Sep 17 00:00:00 2001 From: tleb Date: Tue, 21 Apr 2020 16:12:29 +0200 Subject: [PATCH 4/4] search: reduce user index size --- core/templates/search/indexes/core/user_auto.txt | 8 ++++---- core/templatetags/{replace.py => search_helpers.py} | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) rename core/templatetags/{replace.py => search_helpers.py} (90%) diff --git a/core/templates/search/indexes/core/user_auto.txt b/core/templates/search/indexes/core/user_auto.txt index 09d8005c..9966df06 100644 --- a/core/templates/search/indexes/core/user_auto.txt +++ b/core/templates/search/indexes/core/user_auto.txt @@ -1,13 +1,13 @@ -{% load replace %} +{% load search_helpers %} {% with first=object.first_name|safe|slugify last=object.last_name|safe|slugify nick=object.nick_name|default_if_none:""|safe|slugify %} {{ first|replace:"|-| " }} {{ last|replace:"|-| " }} {{ nick|replace:"|-| " }} -{{ first|cut:"-" }} -{{ last|cut:"-" }} -{{ nick|cut:"-" }} +{% if first|count:"-" != 0 %}{{ first|cut:"-" }}{% endif %} +{% if last|count:"-" != 0 %}{{ last|cut:"-" }}{% endif %} +{% if nick|count:"-" != 0 %}{{ nick|cut:"-" }}{% endif %} {{ first|cut:"-" }}{{ last|cut:"-" }} {% endwith %} diff --git a/core/templatetags/replace.py b/core/templatetags/search_helpers.py similarity index 90% rename from core/templatetags/replace.py rename to core/templatetags/search_helpers.py index f75fa74a..537ff357 100644 --- a/core/templatetags/replace.py +++ b/core/templatetags/search_helpers.py @@ -20,3 +20,8 @@ def replace(value, arg): raise TemplateSyntaxError("badly formatted argument") return value.replace(arg[1], arg[2]) + + +@register.filter +def count(value, arg): + return value.count(arg)