Skip to content

Commit 4e1e056

Browse files
committed
update routes search + logic index
1 parent 8e00097 commit 4e1e056

13 files changed

Lines changed: 329 additions & 137 deletions

File tree

api/admin/views.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
44
Model views for the admin interface.
55
"""
6+
from unidecode import unidecode
67

78
from flask import (url_for,
89
jsonify,
@@ -124,6 +125,10 @@ def is_accessible(self):
124125
return True
125126

126127

128+
129+
130+
131+
127132
class UserView(ModelView):
128133
edit_template = 'admin/edit.user.html'
129134
create_template = 'admin/edit.user.html'
@@ -571,6 +576,41 @@ def render(self, template, **kwargs):
571576
return super(PrinterView, self).render(template, **kwargs)
572577
return super(PrinterView, self).render(template, **kwargs)
573578

579+
def get_list(self, page, sort_field, sort_desc, search, filters, page_size=None):
580+
query = self.session.query(self.model)
581+
all_rows = query.all()
582+
583+
if search:
584+
normalized_search = unidecode(search).lower().strip()
585+
search_tokens = normalized_search.split()
586+
587+
def match(row):
588+
combined = f"{row.lastname or ''} {row.firstnames or ''}"
589+
normalized_combined = unidecode(combined).lower()
590+
return all(token in normalized_combined for token in search_tokens)
591+
592+
filtered_rows = list(filter(match, all_rows))
593+
else:
594+
filtered_rows = all_rows
595+
596+
count = len(filtered_rows)
597+
598+
# Tri optionnel
599+
if sort_field:
600+
reverse = sort_desc
601+
filtered_rows.sort(
602+
key=lambda x: getattr(x, sort_field, '').lower() if getattr(x, sort_field) else '',
603+
reverse=reverse
604+
)
605+
606+
# Pagination Python
607+
if page_size:
608+
start = page * page_size
609+
end = start + page_size
610+
filtered_rows = filtered_rows[start:end]
611+
612+
return count, filtered_rows
613+
574614
# Expose custom routes for printer view
575615
# for Ajax requests
576616

@@ -1101,6 +1141,41 @@ def on_model_change(self, form, model, is_created):
11011141
model.last_editor = current_user.username
11021142
session.commit()
11031143

1144+
def get_list(self, page, sort_field, sort_desc, search, filters, page_size=None):
1145+
query = self.session.query(self.model)
1146+
all_rows = query.all()
1147+
1148+
if search:
1149+
normalized_search = unidecode(search).lower().strip()
1150+
search_tokens = normalized_search.split()
1151+
1152+
def match(row):
1153+
combined = f"{row.label or ''}"
1154+
normalized_combined = unidecode(combined).lower()
1155+
return all(token in normalized_combined for token in search_tokens)
1156+
1157+
filtered_rows = list(filter(match, all_rows))
1158+
else:
1159+
filtered_rows = all_rows
1160+
1161+
count = len(filtered_rows)
1162+
1163+
# Tri optionnel
1164+
if sort_field:
1165+
reverse = sort_desc
1166+
filtered_rows.sort(
1167+
key=lambda x: getattr(x, sort_field, '').lower() if getattr(x, sort_field) else '',
1168+
reverse=reverse
1169+
)
1170+
1171+
# Pagination Python
1172+
if page_size:
1173+
start = page * page_size
1174+
end = start + page_size
1175+
filtered_rows = filtered_rows[start:end]
1176+
1177+
return count, filtered_rows
1178+
11041179

11051180
class AboutView(BaseView):
11061181
"""Custom view for database documentation."""

api/admin/views_dir/utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,14 @@
44
"""
55
import os
66
from werkzeug.utils import secure_filename
7+
from sqlalchemy import func
8+
from unidecode import unidecode
9+
import unicodedata
710

811
def prefix_name(_, file_data):
912
parts = os.path.splitext(file_data.filename)
10-
return secure_filename('file-%s%s' % parts)
13+
return secure_filename('file-%s%s' % parts)
14+
15+
def get_search_filter(column, search, dialect_name):
16+
normalized_search = unidecode(search).lower()
17+
return func.lower(column).like(f"%{normalized_search}%")

api/index_fts/index_utils.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
"""
55
from shutil import rmtree
66
import bleach
7-
7+
from unidecode import unidecode
8+
from tqdm import tqdm
89
from whoosh import index
10+
from joblib import Parallel, delayed
911

1012

1113
def create_store(store, path) -> None:
@@ -37,19 +39,46 @@ def prepare_content(obj):
3739
return clean_text
3840

3941

40-
def populate_index(session, index_, model):
42+
def extract_data(printer):
43+
"""Extraction safe des données sans session SQLAlchemy"""
44+
content = bleach.clean(
45+
" ".join([
46+
printer.personal_information or "",
47+
printer.professional_information or ""
48+
] + [p.references or "" for p in printer.patents]),
49+
tags=[], attributes=[], strip=True
50+
)
51+
52+
return dict(
53+
id_dil=str(printer._id_dil),
54+
lastname=unidecode(printer.lastname or "").lower(),
55+
firstnames=unidecode(printer.firstnames or "").lower(),
56+
clean_text=unidecode(
57+
content
58+
).lower(),
59+
text=content,
60+
)
61+
62+
def populate_index(session, index_, model, n_jobs=-1):
4163
persons = session.query(model).all()
42-
writer = index_.writer()
43-
for printer in persons:
44-
clean_text = prepare_content(printer)
4564

46-
lastname = printer.lastname or ""
47-
firstnames = printer.firstnames or ""
65+
serialized_data = [extract_data(printer) for printer in tqdm(persons, desc="Serializing data for index")]
66+
67+
def process(doc):
68+
return dict(
69+
id_dil=doc['id_dil'],
70+
lastname=doc['lastname'],
71+
firstnames=doc['firstnames'],
72+
firstnames_lastname=f"{' '.join(doc['firstnames'].split(','))} {doc['lastname']}",
73+
content=doc['text'],
74+
content_ngram=doc['clean_text'],
75+
)
4876

49-
writer.add_document(id_dil=str(printer._id_dil),
50-
lastname=lastname,
51-
firstnames=firstnames,
52-
content=clean_text)
77+
documents = Parallel(n_jobs=n_jobs)(
78+
delayed(process)(doc) for doc in tqdm(serialized_data, desc="Prepare documents for index")
79+
)
5380

54-
writer.commit()
81+
with index_.writer() as writer:
82+
for doc in tqdm(documents, desc="Index documents in Whoosh backend"):
83+
writer.add_document(**doc)
5584
return index_

api/index_fts/schemas.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@
55

66
from whoosh.fields import (SchemaClass,
77
ID,
8-
TEXT)
9-
8+
TEXT,
9+
NGRAM,
10+
NGRAMWORDS)
1011

1112
class PersonIdxSchema(SchemaClass):
1213
"""Schema for the PersonIdx index."""
1314
id_dil = ID(stored=True, unique=True)
14-
lastname = TEXT(stored=True)
15-
firstnames = TEXT(stored=True)
16-
content = TEXT(stored=True)
15+
lastname = NGRAM(minsize=2, maxsize=15, stored=True)
16+
firstnames = NGRAM(minsize=2, maxsize=15, stored=True)
17+
firstnames_lastname = NGRAM(minsize=2, maxsize=30, stored=True)
18+
content = TEXT(stored=True)
19+
content_ngram = NGRAMWORDS(minsize=2, maxsize=10, stored=True, field_boost=1.0, tokenizer=None, at='start', queryor=False, sortable=False)

api/index_fts/search_utils.py

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,55 @@
1-
from whoosh.qparser import MultifieldParser
1+
from unidecode import unidecode
2+
from whoosh.qparser import MultifieldParser, FuzzyTermPlugin
3+
from whoosh.query import Or, Term
24
from whoosh import index
35
from api.index_fts.index_conf import st
46

5-
def search_whoosh(keyword, fields=["content"], limit=50):
6-
"""
7-
Recherche plein texte dans un ou plusieurs champs (Whoosh).
7+
from whoosh.qparser import QueryParser, OrGroup, AndGroup
8+
from whoosh.query import Or, Term, And
9+
from unidecode import unidecode
810

9-
:param keyword: le terme recherché
10-
:param fields: liste de champs sur lesquels faire la recherche (par défaut: ["content"])
11-
:param limit: nombre maximum de résultats
12-
:return: liste de dicts avec id_dil et highlights
13-
"""
11+
12+
13+
14+
def search_whoosh(query_firstnames_lastname: str = "",
15+
query_content: str = "",
16+
limit=10000000):
1417
ix = st.open_index()
18+
hits = {}
19+
20+
# Normalisation
21+
def remove_first_joker(query):
22+
"""Remove the first joker character if present."""
23+
if query.strip().startswith("*"):
24+
return query[1:]
25+
return query
26+
27+
query_firstnames_lastname = remove_first_joker(unidecode(query_firstnames_lastname.lower().strip())) if query_firstnames_lastname else ""
28+
query_content = remove_first_joker(query_content.strip()) if query_content else ""
29+
30+
if not query_firstnames_lastname and not query_content:
31+
return {}
32+
1533
with ix.searcher() as searcher:
16-
parser = MultifieldParser(fields, ix.schema)
17-
q = parser.parse(keyword)
18-
results = searcher.search(q, limit=limit)
34+
# Choix de la requête
35+
if query_firstnames_lastname and query_content:
36+
# Requête AND
37+
parser1 = QueryParser('firstnames_lastname', schema=ix.schema)
38+
parser2 = QueryParser('content', schema=ix.schema)
39+
query = And([
40+
parser1.parse(query_firstnames_lastname),
41+
parser2.parse(query_content)
42+
])
43+
elif query_firstnames_lastname:
44+
parser = QueryParser('firstnames_lastname', schema=ix.schema, group=AndGroup)
45+
query = parser.parse(query_firstnames_lastname)
46+
elif query_content:
47+
parser = MultifieldParser(['content'], schema=ix.schema, group=AndGroup)
48+
query = parser.parse(query_content)
1949

20-
hits = []
50+
results = searcher.search(query, limit=limit)
2151
for r in results:
22-
# Try to highlight the first matching field (fallback on 'content')
23-
highlight = None
24-
for field in fields:
25-
if field in r:
26-
highlight = r.highlights(field)
27-
if highlight:
28-
break
29-
30-
hits.append({
31-
"id_dil": r["id_dil"],
32-
"highlight": highlight or "" # fallback to empty if nothing
33-
})
52+
highlight = r.highlights("content") if "content" in r else None
53+
hits[r["id_dil"]] = {"highlight": highlight}
3454

3555
return hits

api/models/models.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import uuid
1010
import time
1111
from functools import wraps
12+
from unidecode import unidecode
1213

1314
from sqlalchemy import (Column,
1415
Integer,
@@ -222,12 +223,17 @@ def update_person_fts_index_after_update(cls, mapper, connection, target, ix):
222223
clean_text = prepare_content(target)
223224
lastname = target.lastname or ""
224225
firstnames = target.firstnames or ""
225-
writer.update_document(
226+
lastname = unidecode(lastname or "").lower().encode('utf-8').decode('utf-8')
227+
firstnames = unidecode(firstnames or "").lower().encode('utf-8').decode('utf-8')
228+
writer.add_document(
226229
id_dil=str(target._id_dil).encode('utf-8').decode('utf-8'),
227-
lastname=lastname.encode('utf-8').decode('utf-8'),
228-
firstnames=firstnames.encode('utf-8').decode('utf-8'),
229-
content=clean_text.encode('utf-8').decode('utf-8')
230+
lastname=lastname,
231+
firstnames=firstnames,
232+
content=clean_text.encode('utf-8').decode('utf-8'),
233+
content_ngram=unidecode(clean_text).lower().encode('utf-8').decode('utf-8'),
234+
firstnames_lastname=f"{' '.join(firstnames.split(','))} {lastname}"
230235
)
236+
231237
writer.commit()
232238

233239
@classmethod
@@ -239,11 +245,15 @@ def insert_person_fts_index_after_insert(cls, mapper, connection, target, ix):
239245
clean_text = prepare_content(target)
240246
lastname = target.lastname or ""
241247
firstnames = target.firstnames or ""
248+
lastname = unidecode(lastname or "").lower().encode('utf-8').decode('utf-8')
249+
firstnames = unidecode(firstnames or "").lower().encode('utf-8').decode('utf-8')
242250
writer.add_document(
243251
id_dil=str(target._id_dil).encode('utf-8').decode('utf-8'),
244-
lastname=lastname.encode('utf-8').decode('utf-8'),
245-
firstnames=firstnames.encode('utf-8').decode('utf-8'),
246-
content=clean_text.encode('utf-8').decode('utf-8')
252+
lastname=lastname,
253+
firstnames=firstnames,
254+
content=clean_text.encode('utf-8').decode('utf-8'),
255+
content_ngram=unidecode(clean_text).lower().encode('utf-8').decode('utf-8'),
256+
firstnames_lastname=f"{' '.join(firstnames.split(','))} {lastname}"
247257
)
248258

249259
writer.commit()

0 commit comments

Comments
 (0)