@@ -194,12 +194,13 @@ def test_extract_best_hits(tmp_dir, input_fasta, blast_output_content):
194194 assert isinstance (results , pd .DataFrame )
195195 assert len (results ) == 2 # Should have one row per unique probe
196196 expected_columns = [
197- "probe_seq" , "target_seq" , "target_seq_reverse_complement" ,
197+ "probe_id" , " probe_seq" , "target_seq" , "target_seq_reverse_complement" ,
198198 "vienna_rna_mfe" , "dna_dna_duplex_dg" , "dna_rna_duplex_dg" , "rna_rna_duplex_dg"
199199 ]
200200 for col in expected_columns :
201201 assert col in results .columns
202- assert "probe1" in results ["probe_seq" ].values or "AUGCAUGCAUGC" in results ["probe_seq" ].values
202+ assert "probe1" in results ["probe_id" ].values
203+ assert "AUGCAUGCAUGC" in results ["probe_seq" ].values
203204 # Verify reverse complement column is present and not empty
204205 assert all (results ["target_seq_reverse_complement" ].str .len () > 0 )
205206
@@ -215,7 +216,7 @@ def test_extract_best_hits_empty_file(tmp_dir, input_fasta):
215216 assert isinstance (results , pd .DataFrame )
216217 assert len (results ) == 0
217218 expected_columns = [
218- "probe_seq" , "target_seq" , "target_seq_reverse_complement" ,
219+ "probe_id" , " probe_seq" , "target_seq" , "target_seq_reverse_complement" ,
219220 "vienna_rna_mfe" , "dna_dna_duplex_dg" , "dna_rna_duplex_dg" , "rna_rna_duplex_dg"
220221 ]
221222 assert list (results .columns ) == expected_columns
@@ -230,7 +231,7 @@ def test_extract_best_hits_nonexistent_file(tmp_dir, input_fasta):
230231 assert isinstance (results , pd .DataFrame )
231232 assert len (results ) == 0
232233 expected_columns = [
233- "probe_seq" , "target_seq" , "target_seq_reverse_complement" ,
234+ "probe_id" , " probe_seq" , "target_seq" , "target_seq_reverse_complement" ,
234235 "vienna_rna_mfe" , "dna_dna_duplex_dg" , "dna_rna_duplex_dg" , "rna_rna_duplex_dg"
235236 ]
236237 assert list (results .columns ) == expected_columns
@@ -262,6 +263,7 @@ def test_run_modeling(
262263 mock_create_visualizations .return_value = os .path .join (tmp_dir , "visualizations" )
263264
264265 mock_df = pd .DataFrame ({
266+ "probe_id" : ["probe1" , "probe2" ],
265267 "probe_seq" : ["ATGCATGCATGC" , "GCTAGCTAGCTA" ],
266268 "target_seq" : ["ATGCATGCATGCATGCATGCATGC" , "GCTAGCTAGCTAGCTAGCTAGCTA" ]
267269 })
@@ -309,6 +311,7 @@ def test_integration_modeling(tmp_dir, input_fasta, output_fasta, mock_args):
309311
310312 assert isinstance (results , pd .DataFrame )
311313 assert len (results ) > 0
314+ assert "probe_id" in results .columns
312315 assert "probe_seq" in results .columns
313316 assert "target_seq" in results .columns
314317
0 commit comments