From dd73754023a6c511ee5cf368190f87b9b0d38445 Mon Sep 17 00:00:00 2001 From: xiangking Date: Tue, 8 Mar 2022 11:29:17 +0800 Subject: [PATCH] =?UTF-8?q?fix(bm25):=20=E4=BF=AE=E5=A4=8Dtokenizer?= =?UTF-8?q?=E5=B1=9E=E6=80=A7bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ark_search/recall/bm25.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ark_search/recall/bm25.py b/ark_search/recall/bm25.py index 6d03862..d183287 100644 --- a/ark_search/recall/bm25.py +++ b/ark_search/recall/bm25.py @@ -51,6 +51,8 @@ def __init__( self.idf = {} self.doc_len = [] + self.tokenizer = tokenizer + if is_retain_docs: self.docs = copy.deepcopy(corpus)