@@ -946,3 +946,102 @@ async fn test_downloader_downloads_sound_files() {
946946 "multimedia.csv should reference local sound file"
947947 ) ;
948948}
949+
950+ #[ tokio:: test]
951+ #[ serial]
952+ async fn test_downloader_no_duplicate_zip_entry_when_photo_appears_in_two_batches ( ) {
953+ // Regression: when the same photo_id appears in two different API pages (e.g. the same
954+ // photo is attached to two different observations), the downloader must not write it to
955+ // the ZIP twice. A duplicate ZIP entry causes `ZipArchive::new` to fail with
956+ // "Invalid Zip archive: Duplicate filename".
957+ let server = MockServer :: start ( ) ;
958+
959+ let config = chuck_core:: api:: client:: create_config_with_base_url_and_jwt (
960+ server. base_url ( ) ,
961+ Some ( "test_jwt" . to_string ( ) ) ,
962+ ) ;
963+
964+ // Page 3: empty, stops pagination
965+ let _observations_page3_mock = server. mock ( |when, then| {
966+ when. method ( GET )
967+ . path ( "/observations" )
968+ . query_param ( "id_below" , "200" ) ;
969+ then. status ( 200 )
970+ . header ( "content-type" , "application/json" )
971+ . json_body ( observations_response_json ( 2 , vec ! [ ] ) ) ;
972+ } ) ;
973+
974+ // Page 2: observation 200 — same photo_id 9999 as page 1
975+ let _observations_page2_mock = server. mock ( |when, then| {
976+ when. method ( GET )
977+ . path ( "/observations" )
978+ . query_param ( "id_below" , "300" ) ;
979+ then. status ( 200 )
980+ . header ( "content-type" , "application/json" )
981+ . json_body ( observations_response_json (
982+ 2 ,
983+ vec ! [ observation_json( 200 , & server. base_url( ) , & [ 9999 ] ) ] ,
984+ ) ) ;
985+ } ) ;
986+
987+ // Page 1: observation 300 with photo_id 9999
988+ let _observations_page1_mock = server. mock ( |when, then| {
989+ when. method ( GET ) . path ( "/observations" ) ;
990+ then. status ( 200 )
991+ . header ( "content-type" , "application/json" )
992+ . json_body ( observations_response_json (
993+ 2 ,
994+ vec ! [ observation_json( 300 , & server. base_url( ) , & [ 9999 ] ) ] ,
995+ ) ) ;
996+ } ) ;
997+
998+ let _taxa_mock = server. mock ( |when, then| {
999+ when. method ( GET ) . path_contains ( "/taxa" ) ;
1000+ then. status ( 200 )
1001+ . header ( "content-type" , "application/json" )
1002+ . json_body ( taxa_response_json ( ) ) ;
1003+ } ) ;
1004+
1005+ // Photo 9999 will be requested once per batch — twice total
1006+ let _photo_mock = server. mock ( |when, then| {
1007+ when. method ( GET ) . path ( "/photo9999.jpg" ) ;
1008+ then. status ( 200 )
1009+ . header ( "content-type" , "image/jpeg" )
1010+ . body ( MINIMAL_PNG ) ;
1011+ } ) ;
1012+
1013+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
1014+ let output_path = temp_dir. path ( ) . join ( "test.zip" ) ;
1015+
1016+ let params = observations_api:: ObservationsGetParams {
1017+ taxon_id : Some ( vec ! [ "47126" . to_string( ) ] ) ,
1018+ per_page : Some ( "1" . to_string ( ) ) ,
1019+ ..chuck_core:: api:: params:: DEFAULT_GET_PARAMS . clone ( )
1020+ } ;
1021+
1022+ let downloader = Downloader :: with_config (
1023+ params,
1024+ vec ! [ DwcaExtension :: SimpleMultimedia ] ,
1025+ true ,
1026+ config,
1027+ ) ;
1028+
1029+ let result = downloader. execute ( output_path. to_str ( ) . unwrap ( ) , |_| { } , None ) . await ;
1030+ assert ! ( result. is_ok( ) , "Download should succeed: {:?}" , result. err( ) ) ;
1031+
1032+ // Opening the archive must not fail with "Duplicate filename"
1033+ let zip_data = std:: fs:: read ( & output_path) . unwrap ( ) ;
1034+ let mut zip = zip:: ZipArchive :: new ( std:: io:: Cursor :: new ( zip_data) )
1035+ . expect ( "ZipArchive::new should succeed — no duplicate entries" ) ;
1036+
1037+ let photo_entries: Vec < _ > = ( 0 ..zip. len ( ) )
1038+ . map ( |i| zip. by_index ( i) . unwrap ( ) . name ( ) . to_string ( ) )
1039+ . filter ( |n| n. contains ( "9999" ) )
1040+ . collect ( ) ;
1041+
1042+ assert_eq ! (
1043+ photo_entries. len( ) ,
1044+ 1 ,
1045+ "expected exactly one ZIP entry for photo 9999, got: {photo_entries:?}"
1046+ ) ;
1047+ }
0 commit comments