mirror of
https://github.com/ae-utbm/sith.git
synced 2024-12-26 09:41:13 +00:00
Merge branch 'sexy-search' into 'master'
Sexy search The goal of this MR is to solve the search issue #96. Let's assume we have a user with firstname `Jean-François`, lastname `Du Pont` and nickname `Ai'gnan`. Here is a list of search that did not include him previously but now includes him (was and still is case-insensitive): * `jean françois` (missing -) ; * `jean-francois` (missing ç) ; * `jean francois` (both) ; * `dupont` (space) ; * `françois` (not the start of his name) ; * `aignan` (missing '). You get it, there are a lot of mistakes that humans can do. It also sorts results by `User.last_update` to avoid putting old accounts at the top of common requests (such as firstname-only or lastname-only requests). ### How it works For those who don't know, the search is handled by Xapian (the search backend) through the haystack library which provides a Django-friendly interface to multiple search backends. Xapian maintains kind of a duplicate of the database (only for models against which we want to search something) which is optimised for search operations. Its "models" are called "indexes" (see `core.search_indexes.UserIndex` for the user model). Every time a user is created or modified, it is indexed (through a signal handler) so that Xapian knows about it. For the user search, what is indexed is the string outputted by the `core/templates/search/indexes/core/user_auto.txt` template. For our example from above, it looks like this: ``` jean francois du pont aignan jeanfrancois dupont jeanfrancoisdupont ``` As you can see, unicode is removed. There also are kind-of duplicates with different spacing as we are using an autocomplete algorithm: it searches from the beginning of words. The one I am not sure about is the last one. Its goal is to allow searching without putting a space between the firstname and lastname. Is this useful? The prod will have to do a `./manage.py update_index`, not sure it does it in the upgrade script. See merge request ae/Sith!269
This commit is contained in:
commit
782cd9a45a
@ -34,6 +34,7 @@ from forum.models import ForumMessage, ForumMessageMeta
|
||||
class UserIndex(indexes.SearchIndex, indexes.Indexable):
|
||||
text = indexes.CharField(document=True, use_template=True)
|
||||
auto = indexes.EdgeNgramField(use_template=True)
|
||||
last_update = indexes.DateTimeField(model_attr="last_update")
|
||||
|
||||
def get_model(self):
|
||||
return User
|
||||
@ -45,6 +46,9 @@ class UserIndex(indexes.SearchIndex, indexes.Indexable):
|
||||
def get_updated_field(self):
|
||||
return "last_update"
|
||||
|
||||
def prepare_auto(self, obj):
|
||||
return self.prepared_data["auto"].strip()[:245]
|
||||
|
||||
|
||||
class IndexSignalProcessor(signals.BaseSignalProcessor):
|
||||
def setup(self):
|
||||
|
@ -1,3 +1,13 @@
|
||||
{{ object.first_name }}
|
||||
{{ object.last_name }}
|
||||
{{ object.nick_name }}
|
||||
{% load search_helpers %}
|
||||
|
||||
{% with first=object.first_name|safe|slugify last=object.last_name|safe|slugify nick=object.nick_name|default_if_none:""|safe|slugify %}
|
||||
|
||||
{{ first|replace:"|-| " }}
|
||||
{{ last|replace:"|-| " }}
|
||||
{{ nick|replace:"|-| " }}
|
||||
{% if first|count:"-" != 0 %}{{ first|cut:"-" }}{% endif %}
|
||||
{% if last|count:"-" != 0 %}{{ last|cut:"-" }}{% endif %}
|
||||
{% if nick|count:"-" != 0 %}{{ nick|cut:"-" }}{% endif %}
|
||||
{{ first|cut:"-" }}{{ last|cut:"-" }}
|
||||
|
||||
{% endwith %}
|
||||
|
27
core/templatetags/search_helpers.py
Normal file
27
core/templatetags/search_helpers.py
Normal file
@ -0,0 +1,27 @@
|
||||
from django.template.exceptions import TemplateSyntaxError
|
||||
from django import template
|
||||
from django.template.defaultfilters import stringfilter
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
||||
# arg should be of the form "|foo|bar" where the first character is the
|
||||
# separator between old and new in value.replace(old, new)
|
||||
@register.filter
|
||||
@stringfilter
|
||||
def replace(value, arg):
|
||||
# s.replace('', '') == s so len(arg) == 2 is fine
|
||||
if len(arg) < 2:
|
||||
raise TemplateSyntaxError("badly formatted argument")
|
||||
|
||||
arg = arg.split(arg[0])
|
||||
|
||||
if len(arg) != 3:
|
||||
raise TemplateSyntaxError("badly formatted argument")
|
||||
|
||||
return value.replace(arg[1], arg[2])
|
||||
|
||||
|
||||
@register.filter
|
||||
def count(value, arg):
|
||||
return value.count(arg)
|
@ -30,6 +30,7 @@ from django.contrib.auth.decorators import login_required
|
||||
from django.utils import html
|
||||
from django.views.generic import ListView, TemplateView
|
||||
from django.conf import settings
|
||||
from django.utils.text import slugify
|
||||
|
||||
import json
|
||||
|
||||
@ -73,7 +74,18 @@ def notification(request, notif_id):
|
||||
|
||||
def search_user(query, as_json=False):
|
||||
try:
|
||||
res = SearchQuerySet().models(User).autocomplete(auto=html.escape(query))[:20]
|
||||
# slugify turns everything into ascii and every whitespace into -
|
||||
# it ends by removing duplicate - (so ' - ' will turn into '-')
|
||||
# replace('-', ' ') because search is whitespace based
|
||||
query = slugify(query).replace("-", " ")
|
||||
# TODO: is this necessary?
|
||||
query = html.escape(query)
|
||||
res = (
|
||||
SearchQuerySet()
|
||||
.models(User)
|
||||
.autocomplete(auto=query)
|
||||
.order_by("-last_update")[:20]
|
||||
)
|
||||
return [r.object for r in res]
|
||||
except TypeError:
|
||||
return []
|
||||
|
Loading…
Reference in New Issue
Block a user