Trinity/queryexec_ctx.h at master · phaistos-networks/Trinity · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#pragma once
#include "docwordspace.h"
#include "exec.h"
#include "matches.h"
#include "similarity.h"
#include "compilation_ctx.h"

#define _HAVE_CANDIDATE_DOCUMENTS_ALLOCATOR

#define TRINITY_LASTBANK_OPTIMIZATION 1
namespace Trinity {
        // 64bytes alignment seems to yield good results, but crashes if optimizer is enabled (i.e struct alignas(64) exec_node {})
        // (this is because we use simple_allocator::New<> which doesn't respect the specified alignment. Not sure
        // if we should implement support for alignment allocations in simple_allocator)
        struct queryexec_ctx;

        // This is more aking to a short-memory implemented as a stack-sort-of system
        struct candidate_document final {
                isrc_docid_t     id{0};
                uint16_t         rc{1};
                uint16_t         bindCnt{0};
                matched_document matchedDocument;
                bool             dwsInUse{false}; // we are only going to reset if we are going to use it
                isrc_docid_t *   curDocQueryTokensCaptured;
                uint16_t         curDocSeq{UINT16_MAX};
                term_hits *      termHits{nullptr};

                candidate_document(queryexec_ctx *const rctx);

                ~candidate_document() {
                        std::free(curDocQueryTokensCaptured);
                        delete[] termHits;
                }

                term_hits *materialize_term_hits(queryexec_ctx *, Codecs::PostingsListIterator *, const exec_term_id_t termID);

                inline void retain() {
                        ++rc;
                }

                inline auto retained() {
                        ++rc;
                        return this;
                }
        };

        // we can use banks to track of all tracked documents
        // where base is e.g (id & ~(SIZE - 1)), i.e rounded down to a number
        // and we can then just dereference entries[id - base] directly
        struct docstracker_bank {
                static constexpr std::size_t SIZE{8192};
                static constexpr std::size_t BM_SIZE{(SIZE + 63) / 64};

// Use of bitmaps can result in an almost 100% speedup
#define BANKS_USE_BM 1
                static_assert((SIZE & 1) == 0 && SIZE < std::numeric_limits<uint16_t>::max());

                isrc_docid_t base;

                struct entry final {
                        candidate_document *document;
                };

#ifdef BANKS_USE_BM
                uint64_t *const bm;
#endif
                entry *const entries;
                uint16_t     setCnt{0};

                docstracker_bank()
                    : entries((entry *)malloc(sizeof(entry) * SIZE))
#ifdef BANKS_USE_BM
                    , bm((uint64_t *)malloc(sizeof(uint64_t) * BM_SIZE))
#endif
                {
                }

                // special constructor for dummy_bank
                docstracker_bank(const bool)
                    : base{std::numeric_limits<isrc_docid_t>::max()}, entries {
                        nullptr
                }
#ifdef BANKS_USE_BM
                , bm {
                        nullptr
                }
#endif
                {
                }

                ~docstracker_bank() noexcept  {
                        std::free(entries);
#ifdef BANKS_USE_BM
                        std::free(bm);
#endif
                }

#ifdef TRINITY_LASTBANK_OPTIMIZATION
                static docstracker_bank dummy_bank;
#endif
        };

        struct iterators_collector final {
                Codecs::PostingsListIterator **data{nullptr};
                uint16_t                       cnt{0};

                void init(const uint16_t n) {
                        data = (Codecs::PostingsListIterator **)malloc(sizeof(Codecs::PostingsListIterator *) * n);
                }

                ~iterators_collector() noexcept {
                        if (data) {
                                std::free(data);
			}
                }
        };

        // This is initialized by the compiler
        // and used by the VM
        struct queryexec_ctx final {
                const bool                          documentsOnly, accumScoreMode;
                IndexSource *const                  idxsrc;
                iterators_collector                 collectedIts;
                Similarity::IndexSourceTermsScorer *scorer{nullptr};

                queryexec_ctx(IndexSource *src, const bool documentsOnly_, const bool accumScoreMode_)
                    : documentsOnly{documentsOnly_}, accumScoreMode{accumScoreMode_}, idxsrc{src} {
                }

                ~queryexec_ctx();

                void capture_matched_term(Codecs::PostingsListIterator *);

                // For simplicity's sake, we are just going to map exec_term_id_t => decoders[] without
                // indirection. For each distict/resolved term, we have a decoder and
                // term_hits in decode_ctx.decoders[] and decode_ctx.termHits[]
                // This means you can index them using a termID
                // This means we may have some nullptr in decode_ctx.decoders[] but that's OK
                void prepare_decoder(exec_term_id_t termID);

                inline term_index_ctx term_ctx(const exec_term_id_t termID) {
                        return tctxMap[termID].first;
                }

                // Resolves a term to a termID relative to the queryexec_ctx
                // This id is meaningless outside this execution context
                // and we use it because its easier to track/use integers than strings
                // See Termspaces in CONCEPTS.md
                exec_term_id_t resolve_term(const str8_t term);

                DocsSetIterators::Iterator *build_iterator(const exec_node n, const uint32_t execFlags);

                // Instead of having a virtual DocsSetIterators::Iterator::~Iterator()
                // which means we would need another entry in the vtable, which means an higher chance for cache misses, for no really good reason
                // we just track all created DocsSetIterators::Iterators along with its type, and in ~queryexec_ctx() we consider the type, cast and delete it
                DocsSetIterators::Iterator *reg_docset_it(DocsSetIterators::Iterator *it);

                // This is specific to PostingsListIterator`s
                Codecs::PostingsListIterator *reg_pli(Codecs::PostingsListIterator *it);

                // indexed by termID
                query_term_ctx **originalQueryTermCtx;

                struct decode_ctx_struct final {
                        Trinity::Codecs::Decoder **decoders{nullptr};
                        uint16_t                   capacity{0};

                        void check(const uint16_t idx);

                        ~decode_ctx_struct();
                } decode_ctx;

                struct
                {
                        candidate_document **data{nullptr};
                        // FIXED: turns out, we can excheed std::numeric_limits<uint16_t>::max() for some queries
                        // e.g for bestprice:
                        // [ ' apple OR "iphone x" OR "apple iphone x" OR ipod OR "apple ipad" OR "world of warcraft"  OR "world of" OR blizzard OR games OR "apple iphone x" OR "iphone X"  OR "Samsung galaxy" OR "32 GB" OR HTC OR "galaxy s8"  OR "phaistos networks" OR "las vegas" OR cid:806' ]
                        // this query could be used to reproduce the problem
                        uint32_t size{0};
                        uint32_t capacity{0};
                } tracked_docrefs;

                struct _reusable_cds final {
                        candidate_document **data{nullptr};
                        uint32_t             size_{0}, capacity{0};

                        void push_back(candidate_document *d);

                        inline auto size() const noexcept {
                                return size_;
                        }

                        inline candidate_document *pop_one() noexcept {
                                return size_ ? data[--size_] : nullptr;
                        }

                } reusableCDS;
#ifdef _HAVE_CANDIDATE_DOCUMENTS_ALLOCATOR
		simple_allocator candidate_documents_allocator{32 * 1024};
#endif
                DocsSetIterators::Iterator *rootIterator{nullptr};

                candidate_document *cds_track(const isrc_docid_t did);

                inline void cds_release(candidate_document *const d) {
                        if (1 == d->rc--) {
                                if (auto th = d->termHits) {
                                        th->set_docid(0);
				}

                                d->rc = 1;
                                d->id = 0;
                                reusableCDS.push_back(d);
                        }
                }

                // To support phrases semantics, where we need access to a document's materialized hits for 2+ terms
                // we will need to materialise those documents in DocsSetIterators::Phrase::consider_phrase_match()
                // so that will both be able to use the materialised (document, term) hits later if required(i.e won't need
                // to materialise again) and to both be able to efficiently stop tracking those documents if they are no longer needed
                void gc_retained_docs(const isrc_docid_t);

                void track_docref(candidate_document *);

// use of banks provides a noticeable speedup
// 393ms down to 340ms. Not a huge difference, but it's welcome(~13%)
#define USE_BANKS 1

#ifdef USE_BANKS
                inline candidate_document *lookup_document(const isrc_docid_t id) {
                        return lookup_document_inbank(id);
                }

                inline void forget_document(candidate_document *doc) {
                        forget_document_inbank(doc);
                }
#else
                candidate_document *lookup_document(const isrc_docid_t);

                void forget_document(candidate_document *);
#endif

                candidate_document *document_by_id(const isrc_docid_t id);

                // to do away with (lastBank != nullptr) tests
                // we can instead assign lastBank to (&docstracker_bank::dummy_bank)
                // and because its base is set to an 'impossible' value, it will work great
                std::unordered_map<str8_t, exec_term_id_t> termsDict;


                // TODO: determine suitable allocator bank size based on some meaningful metric
                // e.g total distinct tokens in the query, otherwise we may just end up allocating more memory than
                // we need and for environments where memory pressure is a concern, this may be important.
                // For now, go with large enough bank sizes for the allocators and figure out something later.
                // We should also track allocated (from allocators) memory that is no longer needed so that we can reuse it
                // Maybe we just need a method for allocating arbitrary amount of memory and releasing it back to the runtime ctx
                simple_allocator                                                      allocator{4096 * 6};
                std::vector<void *>                                                   large_allocs;
                std::unordered_map<exec_term_id_t, std::pair<term_index_ctx, str8_t>> tctxMap;
                std::vector<DocsSetIterators::Iterator *>                             docsetsIterators;
                std::vector<Codecs::PostingsListIterator *>                           allIterators;
#ifndef TRINITY_LASTBANK_OPTIMIZATION
                docstracker_bank *                                                    lastBank{nullptr};
#else
                docstracker_bank *lastBank{&docstracker_bank::dummy_bank};
#endif

                struct
                {
#ifndef USE_BANKS
                        std::vector<candidate_document *> trackedDocuments[16];
#else
                        std::vector<docstracker_bank *> banks, reusableBanks;
#endif
                        isrc_docid_t maxTrackedDocumentID{0}, lastMatchedDocumentID{0};
                };

#ifdef USE_BANKS
                inline docstracker_bank *bank_for(const Trinity::isrc_docid_t id) {
                        const auto base = id & (~(docstracker_bank::SIZE - 1)); // rounded down

#ifndef TRINITY_LASTBANK_OPTIMIZATION
                        if (lastBank && lastBank->base == base)
#else
                        if (lastBank->base == base)
#endif
                        {
                                return lastBank;
                        }
                        else {
                                // consider using counting linear search
                                // may make more sense because it's going to be branchless
                                //
                                // TODO: maybe we can also track the max(bank->base) among
                                // all in banks[], so that we can quickly determine if
                                // id is not in any of the banks[](i.e if (base > max_banks_base) )
                                for (auto b : banks) {
                                        if (b->base == base) {
                                                lastBank = b;
                                                return b;
                                        }
                                }

                                return new_bank(base);
                        }
                }

                docstracker_bank *new_bank(const isrc_docid_t);

                void forget_document_inbank(candidate_document *);

		// track_document() is responsible for tracking the document lookup_document_inbank() can later query for
                candidate_document *lookup_document_inbank(const isrc_docid_t);

                void track_document_inbank(candidate_document *);

                inline void track_document(candidate_document *doc) {
                        track_document_inbank(doc);
                        maxTrackedDocumentID = std::max(maxTrackedDocumentID, doc->id);
                }
#else
                inline void track_document(candidate_document *const doc) {
                        auto &v = trackedDocuments[doc->id & (sizeof_array(trackedDocuments) - 1)];

                        v.emplace_back(doc);
                        maxTrackedDocumentID = std::max(maxTrackedDocumentID, doc->id);
                }
#endif

                void prepare_match(candidate_document *);
        };
} // namespace Trinity