diff --git a/ark_search/recall/bm25.py b/ark_search/recall/bm25.py index 6d03862..d183287 100644 --- a/ark_search/recall/bm25.py +++ b/ark_search/recall/bm25.py @@ -51,6 +51,8 @@ def __init__( self.idf = {} self.doc_len = [] + self.tokenizer = tokenizer + if is_retain_docs: self.docs = copy.deepcopy(corpus)