From ef923c278182eb4eb2379daf7c42345017d87e95 Mon Sep 17 00:00:00 2001 From: Edwin Henneken Date: Tue, 20 May 2025 12:19:01 -0400 Subject: [PATCH] solution for Github issue 71 --- referencesrv/parser/crf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/referencesrv/parser/crf.py b/referencesrv/parser/crf.py index 553b06e..51fbb21 100644 --- a/referencesrv/parser/crf.py +++ b/referencesrv/parser/crf.py @@ -39,7 +39,7 @@ class CRFClassifierText(object): re.compile(r'([A-Z])(\-)([A-Z])\b')] URL_EXTRACTOR = re.compile(r'((url\s*)?(http)s?://[A-z0-9\-\.\/\={}?&%]+)', re.IGNORECASE) - MONTH_NAME_EXTRACTOR = re.compile(r'\b([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|([Nn]ov|[Dd]ec)(?:ember)?)\b') + MONTH_NAME_EXTRACTOR = re.compile(r'\b([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|([Nn]ov|[Dd]ec)(?:ember)?)\b(?!-)') URL_TO_DOI = re.compile(r'((url\s*)?(https\s*:\s*//\s*|http\s*:\s*//\s*)((.*?)doi(.*?)org/))|(DOI:https\s*://\s*)', flags=re.IGNORECASE) URL_TO_ARXIV = re.compile(r'((url\s*)?(https://|http://)(arxiv.org/(abs|pdf)/))', flags=re.IGNORECASE)