Skip to content

Commit 615f96a

Browse files
committed
Bug fix/Patch: firstnames with no spaces
1 parent 0db7f7c commit 615f96a

5 files changed

Lines changed: 47 additions & 6 deletions

File tree

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,5 @@ cython_debug/
179179
#index_dil/
180180
*.seg
181181
*.toc
182-
*WRITELOCK
182+
*WRITELOCK
183+
index_dil/*

api/api_utils.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""api_utils.py
2+
3+
Utility functions for the API.
4+
"""
5+
import re
6+
7+
_COMMA_FIX = re.compile(r'\s*,+\s*')
8+
_MULTI_SPACE = re.compile(r'\s{2,}')
9+
10+
def normalize_firstnames(v: str | None) -> str | None:
11+
"""Normalize firstnames by removing extra spaces and fixing commas.
12+
13+
Examples:
14+
>>> normalize_firstnames(" Auguste,Titus ")
15+
'Auguste, Titus'
16+
>>> normalize_firstnames(" Auguste , Titus ")
17+
'Auguste, Titus'
18+
>>> normalize_firstnames(" Auguste , Titus, ")
19+
'Auguste, Titus'
20+
>>> normalize_firstnames(None)
21+
None
22+
23+
:param v: The input string to normalize.
24+
:type v: str | None
25+
:return: The normalized string or None if input is None.
26+
"""
27+
if not v:
28+
return v
29+
s1 = v.strip()
30+
s = _COMMA_FIX.sub(', ', s1) # "Auguste,Titus" -> "Auguste, Titus" ; "Auguste , Titus" -> "Auguste, Titus"
31+
s = re.sub(r',\s*$', '', s) # remove final comma
32+
s = _MULTI_SPACE.sub(' ', s) # remove multiple spaces
33+
#print(s1," > ", s, " : ", s == s1)
34+
return s

api/crud.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
Address,
2020
Image)
2121
from .models.constants import type_patent_relations
22+
from api.api_utils import normalize_firstnames
2223

2324
MARKUP_HTML_FIELDS = {'personal_information', 'professional_information'}
2425
inverted_type_relations = {v: k for k, v in type_patent_relations.items()}
@@ -67,7 +68,7 @@ def enhance_patent_response(db: Session,
6768
'patent_relations': [{
6869
'_id_dil': str(patent_relation.person_related._id_dil) if patent_relation.person_related._id_dil else None,
6970
'lastname': patent_relation.person_related.lastname,
70-
'firstnames': patent_relation.person_related.firstnames,
71+
'firstnames': normalize_firstnames(patent_relation.person_related.firstnames),
7172
'type': inverted_type_relations.get(patent_relation.type)
7273

7374
} for patent_relation in patent.patent_relations if len(patent.patent_relations) > 0]
@@ -88,7 +89,7 @@ def enhance_printer_response(db: Session,
8889
return {
8990
"_id_dil": str(printer._id_dil) if printer._id_dil else None,
9091
"lastname": printer.lastname,
91-
"firstnames": printer.firstnames,
92+
"firstnames": normalize_firstnames(printer.firstnames),
9293
"birth_date": printer.birth_date,
9394
"birth_city_label": printer.birth_city_label,
9495
"birth_city_id": str(get_city(db, {
@@ -135,7 +136,7 @@ def get_printers(db: Session, args: dict, enhance: bool = False):
135136
{
136137
"_id_dil": str(printer._id_dil) if printer._id_dil else None,
137138
"lastname": printer.lastname,
138-
"firstnames": printer.firstnames,
139+
"firstnames": normalize_firstnames(printer.firstnames),
139140
"total_patents": len(printer.patents),
140141
}
141142
for printer in printers

api/routes.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
City,
4949
Address)
5050
from api.index_fts.search_utils import search_whoosh
51+
from api.api_utils import normalize_firstnames
5152

5253
api_router = APIRouter()
5354

@@ -170,7 +171,7 @@ def get_cities_with_printers(
170171
if person_key not in city_map[city_id]["persons"]:
171172
city_map[city_id]["persons"][person_key] = {
172173
"id": person_key,
173-
"firstnames": row.firstnames,
174+
"firstnames": normalize_firstnames(row.firstnames),
174175
"lastname": row.lastname,
175176
"city_patent": row.patent_city_label
176177
}
@@ -311,6 +312,7 @@ def make_cache_key(name: str, content: str):
311312
raw = f"{name or ''}|{content or ''}"
312313
return hashlib.sha256(raw.encode()).hexdigest()
313314

315+
314316
@cached(cache=cache, key=lambda name, content: make_cache_key(name, content))
315317
def cached_search(name, content):
316318
return search_whoosh(query_firstnames_lastname=name, query_content=content)
@@ -407,12 +409,13 @@ def read_printers(
407409
transformed_items = []
408410
for p in paginated.items:
409411
highlight = whoosh_hits.get(str(p.id_dil), {}).get("highlight")
412+
firstnames = normalize_firstnames(p.firstnames)
410413

411414
transformed_items.append(
412415
PrinterMinimalResponseOut(
413416
_id_dil=str(p.id_dil),
414417
lastname=p.lastname,
415-
firstnames=p.firstnames,
418+
firstnames=firstnames,
416419
total_patents=p.total_patents,
417420
highlight=highlight
418421
)
@@ -427,6 +430,7 @@ def read_printers(
427430
)
428431

429432
except Exception as e:
433+
print(e)
430434
import traceback
431435
return JSONResponse(status_code=500, content={"message": f"Erreur serveur: {e}"})
432436

api/schemas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Union, List, Optional
99
from pydantic import BaseModel, Field
1010

11+
1112
class BaseMeta(BaseModel):
1213
"""An abstract base class for meta schemas."""
1314
id_dil: str = Field(..., alias="_id_dil")

0 commit comments

Comments
 (0)