@@ -62,38 +62,49 @@ def merge_rows(
6262 )
6363
6464 if column_agreement :
65- column_values : dict [str , dict [str , int ]] = {}
66- for row in [left , right ]:
67- for column_name , column_value in normalize_row (row ).get_columns ().items ():
68- values = column_values .setdefault (column_name , {})
69- values_with_agreement = (
70- [ValueWithAgreement (value = column_value , agreement_level = 1 )]
71- if isinstance (column_value , str )
72- else column_value
73- )
74-
75- for value_with_agreement in values_with_agreement :
76- value = value_with_agreement .value
77- if value in values :
78- values [value ] += value_with_agreement .agreement_level
79- else :
80- values [value ] = value_with_agreement .agreement_level
81- columns = {
82- column_name : [
83- ValueWithAgreement (value = column_value , agreement_level = agreement_level )
84- for column_value , agreement_level in column_values .items ()
85- ]
86- for column_name , column_values in column_values .items ()
87- }
88-
65+ columns = merge_columns_with_agreement (left , right )
8966 else :
90- columns = {
91- ** normalize_row (left ).get_columns (),
92- ** normalize_row (right ).get_columns (),
93- }
67+ columns = merge_columns_without_agreement (left , right )
9468 return Row (agreement_level_ = agreement_level , ** columns )
9569
9670
71+ def merge_columns_without_agreement (left : Row , right : Row ):
72+ return {
73+ ** normalize_row (left ).get_columns (),
74+ ** normalize_row (right ).get_columns (),
75+ }
76+
77+
78+ def merge_columns_with_agreement (left : Row , right : Row ):
79+ column_values : dict [str , dict [str , int ]] = {}
80+ for row in [left , right ]:
81+ for column_name , column_value in normalize_row (row ).get_columns ().items ():
82+ values = column_values .setdefault (column_name , {})
83+ values_with_agreement = to_values_with_agreement (column_value )
84+
85+ for value_with_agreement in values_with_agreement :
86+ value = value_with_agreement .value
87+ if value in values :
88+ values [value ] += value_with_agreement .agreement_level
89+ else :
90+ values [value ] = value_with_agreement .agreement_level
91+ return {
92+ column_name : [
93+ ValueWithAgreement (value = column_value , agreement_level = agreement_level )
94+ for column_value , agreement_level in column_values .items ()
95+ ]
96+ for column_name , column_values in column_values .items ()
97+ }
98+
99+
100+ def to_values_with_agreement (column_value : ColumnValue ):
101+ return (
102+ [ValueWithAgreement (value = column_value , agreement_level = 1 )]
103+ if isinstance (column_value , str )
104+ else column_value
105+ )
106+
107+
97108def merge_tablesfiles (
98109 tablesfiles : list [TablesFile ], row_agreement = False , column_agreement = False
99110) -> TablesFile :
0 commit comments