@@ -155,11 +155,12 @@ def test_read_table_with_schema_that_matches_page():
155155 tables = [
156156 TableSchema (
157157 title = "Plants" ,
158+ header_mode = "all_pages" ,
158159 first_page = 1 ,
159160 last_page = 1 ,
160161 column_mappings = {
161- "0" : "vernacular_name" ,
162- "1" : "scientific_name" ,
162+ 0 : "vernacular_name" ,
163+ 1 : "scientific_name" ,
163164 },
164165 )
165166 ],
@@ -216,3 +217,102 @@ def test_read_table_with_schema_that_matches_page():
216217 result_dict = result .to_dict ()
217218 assert result_dict ["metadata" ] == {"filename" : "demo_table.pdf" }
218219 assert len (result_dict ["tables" ][0 ]["table_fragments" ]) == 1
220+
221+
222+ def test_read_table_with_schema_without_headers ():
223+ result = read_tables (
224+ "./tests/data/demo_table.pdf" ,
225+ schema = TablesSchema (
226+ tables = [
227+ TableSchema (
228+ title = "Plants" ,
229+ header_mode = "none" ,
230+ first_page = 1 ,
231+ last_page = 1 ,
232+ column_mappings = {
233+ 0 : "vernacular_name" ,
234+ 1 : "scientific_name" ,
235+ },
236+ )
237+ ],
238+ citation = "A citation" ,
239+ ),
240+ )
241+
242+ assert result .citation == "A citation"
243+ assert len (result .tables ) == 1
244+ assert result .tables [0 ].title == "Plants"
245+ assert result .tables [0 ].page == 1
246+ assert result .tables [0 ].rows == [
247+ {
248+ "scientific_name" : "scienti\x00 c_name" ,
249+ "vernacular_name" : "common_name" ,
250+ },
251+ {
252+ "vernacular_name" : "Sun\x00 ower" ,
253+ "scientific_name" : "Helianthus annuus" ,
254+ },
255+ {
256+ "vernacular_name" : "Rose" ,
257+ "scientific_name" : "Rosa gallica" ,
258+ },
259+ {
260+ "vernacular_name" : "Tulip" ,
261+ "scientific_name" : "Tulipa gesneriana" ,
262+ },
263+ {
264+ "vernacular_name" : "Lavender" ,
265+ "scientific_name" : "Lavandula angustifolia" ,
266+ },
267+ {
268+ "vernacular_name" : "Oak" ,
269+ "scientific_name" : "Quercus robur" ,
270+ },
271+ {
272+ "vernacular_name" : "Maple" ,
273+ "scientific_name" : "Acer saccharum" ,
274+ },
275+ {
276+ "vernacular_name" : "Dandelion" ,
277+ "scientific_name" : "Taraxacum o\x00 cinale" ,
278+ },
279+ {
280+ "vernacular_name" : "Bamboo" ,
281+ "scientific_name" : "Bambusa vulgaris" ,
282+ },
283+ {
284+ "vernacular_name" : "Cactus (Prickly Pear)" ,
285+ "scientific_name" : "Opuntia \x00 cus-indica" ,
286+ },
287+ {
288+ "vernacular_name" : "Coffee" ,
289+ "scientific_name" : "Coffea arabica" ,
290+ },
291+ ]
292+ result_dict = result .to_dict ()
293+ assert result_dict ["metadata" ] == {"filename" : "demo_table.pdf" }
294+ assert len (result_dict ["tables" ][0 ]["table_fragments" ]) == 1
295+
296+
297+ def test_read_table_with_schema_that_doesnt_matches_page ():
298+ result = read_tables (
299+ "./tests/data/demo_table.pdf" ,
300+ schema = TablesSchema (
301+ tables = [
302+ TableSchema (
303+ title = "Plants" ,
304+ header_mode = "all_pages" ,
305+ first_page = 2 ,
306+ last_page = 2 ,
307+ column_mappings = {
308+ 0 : "vernacular_name" ,
309+ 1 : "scientific_name" ,
310+ },
311+ )
312+ ],
313+ citation = "A citation" ,
314+ ),
315+ )
316+
317+ assert result .citation == "A citation"
318+ assert len (result .tables ) == 0
0 commit comments