From dca256d98b0d530b6292837f30e4280367901d9c Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Fri, 23 Jan 2026 10:45:02 -0600 Subject: [PATCH 1/3] Support per_page parameter Why these changes are being introduced: As we begin to experiment with reranking results from multiple APIs, it will be useful to configure TIMDEX to return a flexible number of results. Relevant ticket(s): - [TIMX-593](https://mitlibraries.atlassian.net/browse/TIMX-593) How this addresses that need: This updates the OpenSearch model to calculate query size based on the presence of a per_page param. The model continues to fall back on the SIZE and MAX_PAGE constants if no param is provided. Similarly, the QueryType class is updated to provide a per_page query param. We here we set the default value to 20 as a secondary layer of defense against bad inputs. Side effects of this change: New VCR cassette to cover the GraphQL query behavior. Normally I prefer stubs/mocks, but since this is a single additional test in a suite that solely uses VCR, it felt appropriate to continue that practice. --- app/graphql/types/query_type.rb | 9 ++- app/models/opensearch.rb | 11 ++- test/controllers/graphql_controller_test.rb | 19 +++++ test/models/opensearch_test.rb | 28 ++++++++ .../graphql_search_per_page_5.yml | 70 +++++++++++++++++++ 5 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 test/vcr_cassettes/graphql_search_per_page_5.yml diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb index 3950b8d..bbb9f51 100644 --- a/app/graphql/types/query_type.rb +++ b/app/graphql/types/query_type.rb @@ -54,6 +54,8 @@ def record_id(id:, index:) argument :title, String, required: false, default_value: nil, description: 'Search by title' argument :from, String, required: false, default_value: '0', description: 'Search result number to begin with (the first result is 0)' + argument :per_page, Integer, required: false, default_value: 20, + description: 'Number of results per page (OpenSearch size). Defaults to 20.' argument :fulltext, Boolean, required: false, default_value: false, description: 'Include fulltext field in search? Defaults to false.' argument :index, String, required: false, default_value: nil, @@ -101,9 +103,9 @@ def record_id(id:, index:) end def search(searchterm:, citation:, contributors:, funding_information:, geodistance:, geobox:, identifiers:, - locations:, subjects:, title:, index:, source:, from:, boolean_type:, fulltext:, **filters) + locations:, subjects:, title:, index:, source:, from:, boolean_type:, fulltext:, per_page: 20, **filters) query = construct_query(searchterm, citation, contributors, funding_information, geodistance, geobox, identifiers, - locations, subjects, title, source, boolean_type, filters) + locations, subjects, title, source, boolean_type, filters, per_page) results = Opensearch.new.search(from, query, Timdex::OSClient, highlight_requested?, index, fulltext) @@ -133,7 +135,7 @@ def inject_hits_fields_into_source(hits) end def construct_query(searchterm, citation, contributors, funding_information, geodistance, geobox, identifiers, - locations, subjects, title, source, boolean_type, filters) + locations, subjects, title, source, boolean_type, filters, per_page) query = {} query[:q] = searchterm query[:boolean_type] = boolean_type @@ -144,6 +146,7 @@ def construct_query(searchterm, citation, contributors, funding_information, geo query[:geobox] = geobox query[:identifiers] = identifiers query[:locations] = locations + query[:per_page] = per_page query[:subjects] = subjects query[:title] = title query[:access_to_files_filter] = filters[:access_to_files_filter] diff --git a/app/models/opensearch.rb b/app/models/opensearch.rb index 6aeb27f..955d803 100644 --- a/app/models/opensearch.rb +++ b/app/models/opensearch.rb @@ -24,9 +24,18 @@ def default_index # Construct the json query to send to elasticsearch def build_query(from) + # allow overriding the OpenSearch `size` via params (per_page), capped by MAX_PAGE + calculate_size = if @params && @params[:per_page] + per_page = @params[:per_page].to_i + per_page = SIZE if per_page <= 0 + [per_page, MAX_PAGE].min + else + SIZE + end + query_hash = { from:, - size: SIZE, + size: calculate_size, query:, aggregations: Aggregations.all, sort: diff --git a/test/controllers/graphql_controller_test.rb b/test/controllers/graphql_controller_test.rb index ae4a24e..213ae8a 100644 --- a/test/controllers/graphql_controller_test.rb +++ b/test/controllers/graphql_controller_test.rb @@ -937,4 +937,23 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest end end end + + test 'graphql search respects perPage argument' do + VCR.use_cassette('opensearch_init') do + VCR.use_cassette('graphql_search_per_page_5', match_requests_on: [:method, :uri]) do + post '/graphql', params: { query: '{ + search(perPage:5) { + hits + records { + title + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + assert_equal(5, json['data']['search']['records'].count) + assert_equal(100, json['data']['search']['hits']) + end + end + end end diff --git a/test/models/opensearch_test.rb b/test/models/opensearch_test.rb index 7f4b89f..be7a302 100644 --- a/test/models/opensearch_test.rb +++ b/test/models/opensearch_test.rb @@ -359,4 +359,32 @@ class OpensearchTest < ActiveSupport::TestCase os.query.to_json.include?('{"locations.geoshape":{"top":"42.886","bottom":"41.239","left":"-69.507","right":"-73.928"}}') ) end + + test 'build_query uses default size' do + os = Opensearch.new + os.instance_variable_set(:@params, {}) + json = JSON.parse(os.build_query(0)) + assert_equal Opensearch::SIZE, json['size'] + end + + test 'build_query respects per_page' do + os = Opensearch.new + os.instance_variable_set(:@params, { per_page: 5 }) + json = JSON.parse(os.build_query(0)) + assert_equal 5, json['size'] + end + + test 'build_query falls back for nonpositive per_page' do + os = Opensearch.new + os.instance_variable_set(:@params, { per_page: 0 }) + json = JSON.parse(os.build_query(0)) + assert_equal Opensearch::SIZE, json['size'] + end + + test 'build_query caps per_page at MAX_PAGE' do + os = Opensearch.new + os.instance_variable_set(:@params, { per_page: Opensearch::MAX_PAGE + 100 }) + json = JSON.parse(os.build_query(0)) + assert_equal Opensearch::MAX_PAGE, json['size'] + end end diff --git a/test/vcr_cassettes/graphql_search_per_page_5.yml b/test/vcr_cassettes/graphql_search_per_page_5.yml new file mode 100644 index 0000000..c518ff5 --- /dev/null +++ b/test/vcr_cassettes/graphql_search_per_page_5.yml @@ -0,0 +1,70 @@ +--- +http_interactions: +- request: + method: post + uri: http://localhost:9200/all-current/_search + body: + encoding: UTF-8 + string: '' + headers: + User-Agent: + - 'opensearch-ruby/3.1.0 (RUBY_VERSION: 3.2.2; darwin x86_64; Faraday v2.9.0)' + Content-Type: + - application/json + Host: + - localhost:9200 + X-Amz-Date: + - "" + X-Amz-Content-Sha256: + - "" + Authorization: + - "" + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - "Thu, 01 Jan 1970 00:00:00 GMT" + Content-Type: + - application/json; charset=UTF-8 + Connection: + - keep-alive + Vary: + - Accept-Encoding, User-Agent + body: + encoding: ASCII-8BIT + string: | + { + "took": 1, + "timed_out": false, + "_shards": { "total": 1, "successful": 1, "failed": 0 }, + "hits": { + "total": { "value": 100, "relation": "eq" }, + "max_score": 1.0, + "hits": [ + { "_index": "all-current", "_id": "fake1", "_score": 1.0, "_source": { "title": "Fake Title 1" } }, + { "_index": "all-current", "_id": "fake2", "_score": 1.0, "_source": { "title": "Fake Title 2" } }, + { "_index": "all-current", "_id": "fake3", "_score": 1.0, "_source": { "title": "Fake Title 3" } }, + { "_index": "all-current", "_id": "fake4", "_score": 1.0, "_source": { "title": "Fake Title 4" } }, + { "_index": "all-current", "_id": "fake5", "_score": 1.0, "_source": { "title": "Fake Title 5" } } + ] + }, + "aggregations": { + "access_to_files": { "only_file_access": { "access_types": { "buckets": [] } } }, + "contributors": { "contributor_names": { "buckets": [] } }, + "source": { "buckets": [] }, + "subjects": { "subject_names": { "buckets": [] } }, + "places": { "only_spatial": { "place_names": { "buckets": [] } } }, + "languages": { "buckets": [] }, + "literary_form": { "buckets": [] }, + "content_format": { "buckets": [] }, + "content_type": { "buckets": [] } + } + } + recorded_at: Thu, 23 Jan 2026 00:00:00 GMT +recorded_with: VCR 6.2.0 From 64039d4a44d6b00f74f424f6139c56f644ac2156 Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:49:09 -0600 Subject: [PATCH 2/3] Address code review feedback --- app/graphql/types/query_type.rb | 2 +- app/models/opensearch.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb index bbb9f51..6ef9a07 100644 --- a/app/graphql/types/query_type.rb +++ b/app/graphql/types/query_type.rb @@ -55,7 +55,7 @@ def record_id(id:, index:) argument :from, String, required: false, default_value: '0', description: 'Search result number to begin with (the first result is 0)' argument :per_page, Integer, required: false, default_value: 20, - description: 'Number of results per page (OpenSearch size). Defaults to 20.' + description: 'Number of results per page. Defaults to 20.' argument :fulltext, Boolean, required: false, default_value: false, description: 'Include fulltext field in search? Defaults to false.' argument :index, String, required: false, default_value: nil, diff --git a/app/models/opensearch.rb b/app/models/opensearch.rb index 955d803..e5c2e5e 100644 --- a/app/models/opensearch.rb +++ b/app/models/opensearch.rb @@ -2,7 +2,7 @@ # rubocop:disable Metrics/MethodLength class Opensearch SIZE = 20 - MAX_PAGE = 200 + MAX_SIZE = 200 def search(from, params, client, highlight = false, index = nil, fulltext = false) @params = params @@ -28,7 +28,7 @@ def build_query(from) calculate_size = if @params && @params[:per_page] per_page = @params[:per_page].to_i per_page = SIZE if per_page <= 0 - [per_page, MAX_PAGE].min + [per_page, MAX_SIZE].min else SIZE end From 581cfdb5c4d56b0b8e328216038bceeefb9ec49a Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:50:53 -0600 Subject: [PATCH 3/3] Update test --- test/models/opensearch_test.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/models/opensearch_test.rb b/test/models/opensearch_test.rb index be7a302..491d0be 100644 --- a/test/models/opensearch_test.rb +++ b/test/models/opensearch_test.rb @@ -381,10 +381,10 @@ class OpensearchTest < ActiveSupport::TestCase assert_equal Opensearch::SIZE, json['size'] end - test 'build_query caps per_page at MAX_PAGE' do + test 'build_query caps per_page at MAX_SIZE' do os = Opensearch.new - os.instance_variable_set(:@params, { per_page: Opensearch::MAX_PAGE + 100 }) + os.instance_variable_set(:@params, { per_page: Opensearch::MAX_SIZE + 100 }) json = JSON.parse(os.build_query(0)) - assert_equal Opensearch::MAX_PAGE, json['size'] + assert_equal Opensearch::MAX_SIZE, json['size'] end end