1+ import pytest
2+ import sys , os
3+ sys .path .append (os .path .abspath (os .path .join (os .path .dirname (__file__ ), '..' )))
4+
5+ from phidown .search import CopernicusDataSearcher
6+ from unittest .mock import Mock , patch
7+ import pandas as pd
8+
9+ # Define the path to the config file relative to the test file
10+ CONFIG_PATH = os .path .join (os .path .dirname (__file__ ), '..' , 'phidown' , 'config.json' )
11+
12+
13+ def test_pagination_disabled_by_default ():
14+ """Test that pagination is not triggered when count=False"""
15+ searcher = CopernicusDataSearcher ()
16+ searcher .query_by_filter (
17+ collection_name = 'SENTINEL-1' ,
18+ product_type = 'SLC' ,
19+ top = 10 ,
20+ count = False # Pagination should not trigger
21+ )
22+
23+ # Mock response with large count
24+ mock_response = Mock ()
25+ mock_response .json .return_value = {
26+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (10 )],
27+ '@odata.count' : 1500 # More than top=10, but count=False
28+ }
29+ mock_response .raise_for_status = Mock ()
30+
31+ with patch ('requests.get' , return_value = mock_response ) as mock_get :
32+ df = searcher .execute_query ()
33+
34+ # Should only make one request since count=False
35+ assert mock_get .call_count == 1
36+ assert len (df ) == 10 # Only the first page
37+
38+
39+ def test_pagination_when_count_enabled_and_results_exceed_top ():
40+ """Test pagination is triggered when count=True and results > top"""
41+ searcher = CopernicusDataSearcher ()
42+ searcher .query_by_filter (
43+ collection_name = 'SENTINEL-1' ,
44+ product_type = 'SLC' ,
45+ top = 5 ,
46+ count = True
47+ )
48+
49+ # Mock responses for pagination
50+ mock_response_1 = Mock ()
51+ mock_response_1 .json .return_value = {
52+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (5 )],
53+ '@odata.count' : 12
54+ }
55+ mock_response_1 .raise_for_status = Mock ()
56+
57+ mock_response_2 = Mock ()
58+ mock_response_2 .json .return_value = {
59+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (5 , 10 )]
60+ }
61+ mock_response_2 .raise_for_status = Mock ()
62+
63+ mock_response_3 = Mock ()
64+ mock_response_3 .json .return_value = {
65+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (10 , 12 )]
66+ }
67+ mock_response_3 .raise_for_status = Mock ()
68+
69+ with patch ('requests.get' , side_effect = [mock_response_1 , mock_response_2 , mock_response_3 ]) as mock_get :
70+ df = searcher .execute_query ()
71+
72+ # Should make 3 requests total
73+ assert mock_get .call_count == 3
74+ assert len (df ) == 12
75+
76+ # Check that skip parameters were used correctly
77+ calls = mock_get .call_args_list
78+ assert '$skip=5' in calls [1 ][0 ][0 ]
79+ assert '$skip=10' in calls [2 ][0 ][0 ]
80+
81+
82+ def test_no_pagination_when_results_within_top_limit ():
83+ """Test no pagination when count=True but results <= top"""
84+ searcher = CopernicusDataSearcher ()
85+ searcher .query_by_filter (
86+ collection_name = 'SENTINEL-1' ,
87+ product_type = 'SLC' ,
88+ top = 100 ,
89+ count = True
90+ )
91+
92+ # Mock response with count less than top
93+ mock_response = Mock ()
94+ mock_response .json .return_value = {
95+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (50 )],
96+ '@odata.count' : 50 # Less than top=100
97+ }
98+ mock_response .raise_for_status = Mock ()
99+
100+ with patch ('requests.get' , return_value = mock_response ) as mock_get :
101+ df = searcher .execute_query ()
102+
103+ # Should only make one request
104+ assert mock_get .call_count == 1
105+ assert len (df ) == 50
106+
107+
108+ def test_pagination_with_1000_page_size ():
109+ """Test pagination with default page size of 1000"""
110+ searcher = CopernicusDataSearcher ()
111+ searcher .query_by_filter (
112+ collection_name = 'SENTINEL-1' ,
113+ product_type = 'SLC' ,
114+ top = 1000 , # Default page size
115+ count = True
116+ )
117+
118+ # Mock responses for large dataset
119+ mock_response_1 = Mock ()
120+ mock_response_1 .json .return_value = {
121+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (1000 )],
122+ '@odata.count' : 2500
123+ }
124+ mock_response_1 .raise_for_status = Mock ()
125+
126+ mock_response_2 = Mock ()
127+ mock_response_2 .json .return_value = {
128+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (1000 , 2000 )]
129+ }
130+ mock_response_2 .raise_for_status = Mock ()
131+
132+ mock_response_3 = Mock ()
133+ mock_response_3 .json .return_value = {
134+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (2000 , 2500 )]
135+ }
136+ mock_response_3 .raise_for_status = Mock ()
137+
138+ with patch ('requests.get' , side_effect = [mock_response_1 , mock_response_2 , mock_response_3 ]) as mock_get :
139+ df = searcher .execute_query ()
140+
141+ # Should make 3 requests total
142+ assert mock_get .call_count == 3
143+ assert len (df ) == 2500
144+
145+ # Check skip parameters
146+ calls = mock_get .call_args_list
147+ assert '$skip=1000' in calls [1 ][0 ][0 ]
148+ assert '$skip=2000' in calls [2 ][0 ][0 ]
149+
150+
151+ def test_pagination_handles_request_errors_gracefully ():
152+ """Test that pagination handles request errors gracefully"""
153+ searcher = CopernicusDataSearcher ()
154+ searcher .query_by_filter (
155+ collection_name = 'SENTINEL-1' ,
156+ product_type = 'SLC' ,
157+ top = 5 ,
158+ count = True
159+ )
160+
161+ # Mock first response successful
162+ mock_response_1 = Mock ()
163+ mock_response_1 .json .return_value = {
164+ 'value' : [{'Id' : f'product_{ i } ' , 'Name' : f'name_{ i } ' } for i in range (5 )],
165+ '@odata.count' : 15
166+ }
167+ mock_response_1 .raise_for_status = Mock ()
168+
169+ # Mock second response fails
170+ mock_response_2 = Mock ()
171+ mock_response_2 .raise_for_status .side_effect = Exception ("Network error" )
172+
173+ with patch ('requests.get' , side_effect = [mock_response_1 , mock_response_2 ]):
174+ # Should not raise exception, but return partial results
175+ df = searcher .execute_query ()
176+
177+ # Should return at least the first page
178+ assert len (df ) == 5
179+ assert 'product_0' in df ['Id' ].values
0 commit comments