Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions algorithms/bench/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ INCLUDE = -Icommon
%.o : %.C $(COMMON)
$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@

timeDistance : timeDistance.C $(COMMON)
$(CC) $(CFLAGS) $(INCLUDE) -o timeDistance timeDistance.C

# $(BNCHMRK)Check : $(CHECKFILES)
# $(CC) $(LFLAGS) -o $@ $(CHECKFILES)

Expand Down
316 changes: 135 additions & 181 deletions algorithms/utils/beamSearch.h

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions algorithms/utils/check_nn_recall.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ void search_and_parse(Graph_ G_,
indexType start_point = 0,
bool verbose = false,
long fixed_beam_width = 0,
double rerank_factor = 100,
double batch_factor = .125) {
int rerank_factor = 100) {
parlay::sequence<nn_result> results;
std::vector<long> beams;
std::vector<long> allr;
Expand All @@ -211,7 +210,10 @@ void search_and_parse(Graph_ G_,
random,
start_point, k, QP, verbose);};

QueryParams QP(k, 0, 1.0, G.size(), G.max_degree(), rerank_factor, batch_factor);
QueryParams QP;
QP.limit = (long) G.size();
QP.rerank_factor = rerank_factor;
QP.degree_limit = (long) G.max_degree();
beams = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, 32,
34, 36, 38, 40, 45, 50, 55, 60, 65, 70, 80, 90, 100, 120, 140, 160,
180, 200, 225, 250, 275, 300, 375, 500, 750, 1000};
Expand Down Expand Up @@ -245,8 +247,7 @@ void search_and_parse(Graph_ G_,
//calculate_limits(results[0].avg_visited);
//parlay::sequence<long> degree_limits = calculate_limits(G.max_degree());
//degree_limits.push_back(G.max_degree());
QP = QueryParams(r, r, 1.35, (long) G.size(), (long) G.max_degree(),
rerank_factor, batch_factor);
QP = QueryParams(r, r, 1.35, (long) G.size(), (long) G.max_degree());
for(long l : limits){
QP.limit = l;
QP.beamSize = std::max<long>(l, r);
Expand All @@ -255,8 +256,7 @@ void search_and_parse(Graph_ G_,
results.push_back(check(r, QP));
}
// check "best accuracy"
QP = QueryParams((long) 100, (long) 1000, (double) 10.0, (long) G.size(),
(long) G.max_degree(), rerank_factor, batch_factor);
QP = QueryParams((long) 100, (long) 1000, (double) 10.0, (long) G.size(), (long) G.max_degree());
results.push_back(check(r, QP));

parlay::sequence<float> buckets = {.1, .2, .3, .4, .5, .6, .7, .75, .8, .85,
Expand Down
152 changes: 38 additions & 114 deletions algorithms/utils/check_range_recall.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#ifndef ALGORITHMS_CHECK_RANGE_RECALL
#define ALGORITHMS_CHECK_RANGE_RECALL

#include <algorithm>
#include <set>

#include "beamSearch.h"
#include "doublingSearch.h"
#include "rangeSearch.h"
#include "csvfile.h"
#include "parse_results.h"
#include "parlay/parallel.h"
Expand All @@ -13,148 +14,71 @@

namespace parlayANN {

template<typename Point, typename PointRange, typename QPointRange, typename indexType>
template<typename Point, typename PointRange, typename indexType>
void checkRangeRecall(
Graph<indexType> &G,
PointRange &Base_Points, PointRange &Query_Points,
QPointRange &Q_Base_Points, QPointRange &Q_Query_Points,
RangeGroundTruth<indexType> GT, QueryParams QP,
long start_point,parlay::sequence<indexType> &active_indices) {
PointRange &Base_Points,
PointRange &Query_Points,
RangeGroundTruth<indexType> GT,
RangeParams RP,
long start_point) {

if(QP.range_query_type == Doubling) {

parlay::internal::timer t;
float query_time;
stats<indexType> QueryStats(Query_Points.size());
parlay::sequence<indexType> start_points = {static_cast<indexType>(start_point)};

auto [all_rr,timings] = DoubleBeamRangeSearch(G,
Query_Points, Base_Points,
Q_Query_Points, Q_Base_Points,
QueryStats, start_points, QP, active_indices);
query_time = t.next_time();
auto [beam_search_time, other_time] = timings;

float pointwise_recall = 0.0;
float reported_results = 0.0;
float total_results = 0.0;
float num_nonzero = 0.0;

//since distances are exact, just have to cross-check number of results
size_t n = Query_Points.size();
for (indexType i = 0; i < n; i++) {
float num_reported_results = all_rr[i].size();
float num_actual_results = GT[i].size();
reported_results += num_reported_results;
total_results += num_actual_results;
if(num_actual_results != 0) {pointwise_recall += num_reported_results/num_actual_results; num_nonzero++;}
}

pointwise_recall /= num_nonzero;
float cumulative_recall = reported_results/total_results;

float QPS = Query_Points.size() / query_time;
auto stats_ = {QueryStats.dist_stats(), QueryStats.visited_stats()};
std::cout << "For ";
QP.print();
std::cout << ", Point Recall=" << pointwise_recall
<< ", Cum Recall=" << cumulative_recall
<< ", Comparisons=" << QueryStats.dist_stats()[0]
<< ", Visited=" << QueryStats.visited_stats()[0]
<< ", QPS=" << QPS
<< ", ctime=" << (1e9 / (QPS * QueryStats.dist_stats()[0]))
<< ", timings= [" << beam_search_time<< ","<< other_time <<"]"
<< std::endl;

} else if (QP.range_query_type == Greedy || QP.range_query_type == Beam) {
parlay::sequence<parlay::sequence<indexType>> all_rr;

parlay::internal::timer t;
float query_time;
stats<indexType> QueryStats(Query_Points.size());
parlay::sequence<indexType> start_points = {static_cast<indexType>(start_point)};
parlay::internal::timer t;

auto [all_rr, timings] = RangeSearch<Point,PointRange,QPointRange,indexType>(G,
Query_Points, Base_Points,
Q_Query_Points, Q_Base_Points,
QueryStats, start_point, QP);
auto [beam_search_time, other_time] = timings;

all_rr = RangeSearch<Point, PointRange, indexType>(Query_Points, G, Base_Points, QueryStats, start_point, RP);
query_time = t.next_time();


float pointwise_recall = 0.0;
float reported_results = 0.0;
float total_results = 0.0;
float num_nonzero = 0.0;

//since distances are exact, just have to cross-check number of results
size_t n = Query_Points.size();
for (indexType i = 0; i < n; i++) {
float num_reported_results = all_rr[i].size();
float num_actual_results = GT[i].size();
reported_results += num_reported_results;
total_results += num_actual_results;
if(num_actual_results != 0) {pointwise_recall += num_reported_results/num_actual_results; num_nonzero++;}
}
//since distances are exact, just have to cross-check number of results
size_t n = Query_Points.size();
for (indexType i = 0; i < n; i++) {
float num_reported_results = all_rr[i].size();
float num_actual_results = GT[i].size();
reported_results += num_reported_results;
total_results += num_actual_results;
if(num_actual_results != 0) {pointwise_recall += num_reported_results/num_actual_results; num_nonzero++;}
}

pointwise_recall /= num_nonzero;
float cumulative_recall = reported_results/total_results;
pointwise_recall /= num_nonzero;
float cumulative_recall = reported_results/total_results;

float QPS = Query_Points.size() / query_time;
auto stats_ = {QueryStats.dist_stats(), QueryStats.visited_stats()};

std::cout << "For ";
QP.print();
std::cout << ", Point Recall=" << pointwise_recall
<< ", Cum Recall=" << cumulative_recall
<< ", Comparisons=" << QueryStats.dist_stats()[0]
<< ", Visited=" << QueryStats.visited_stats()[0]
<< ", QPS=" << QPS
<< ", ctime=" << (1e9 / (QPS * QueryStats.dist_stats()[0]))
<< ", timings= [" << beam_search_time<< ","<< other_time <<"]"
<< std::endl;
}
else {
std::cout << "Error: No beam search type provided, -seach_mode should be one of [doubling, greedy, beam]" << std::endl;
}
RP.print();
std::cout << ", Pointwise Recall = " << pointwise_recall << ", Cumulative Recall = " << cumulative_recall << ", QPS = " << QPS << std::endl;


}


template<typename Point, typename PointRange, typename QPointRange, typename indexType>
void range_search_wrapper(Graph<indexType> &G,
PointRange &Base_Points, PointRange &Query_Points,
QPointRange &Q_Base_Points, QPointRange &Q_Query_Points,
RangeGroundTruth<indexType> GT, indexType start_point=0,
bool is_early_stopping = false, double esr = 0.0,
rangeQueryType rtype = None, double rad = 0.0) {
template<typename Point, typename PointRange, typename indexType>
void range_search_wrapper(Graph<indexType> &G, PointRange &Base_Points,
PointRange &Query_Points,
RangeGroundTruth<indexType> GT, double rad,
indexType start_point=0){

std::vector<long> beams;

beams = {10, 20, 30, 40, 50, 100, 1000, 2000, 3000};

long es = 0;

parlay::sequence<indexType> all = parlay::tabulate(Query_Points.size(), [&] (indexType i){return i;});
parlay::sequence<double> cumulative_recall;
parlay::sequence<std::pair<double,double>> timings;
parlay::sequence<long> beam_size;



for(long b: beams){
if (is_early_stopping)
es = std::max((long)10, b/4);

QueryParams QP(b, b, 0.0, G.size(), G.max_degree(),
is_early_stopping, esr, es, rtype, rad);


checkRangeRecall<Point>(G,
Base_Points, Query_Points,
Q_Base_Points, Q_Query_Points,
GT, QP, start_point, all);

RangeParams RP(rad, b);
checkRangeRecall<Point, PointRange, indexType>(G, Base_Points, Query_Points, GT, RP, start_point);
}



}

} // end namespace
#endif // ALGORITHMS_CHECK_RANGE_RECALL
128 changes: 0 additions & 128 deletions algorithms/utils/doublingSearch.h

This file was deleted.

Loading