-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcli.py
More file actions
62 lines (46 loc) · 1.93 KB
/
cli.py
File metadata and controls
62 lines (46 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""Command-line entrypoint for GST schema standardization."""
from __future__ import annotations
import argparse
from gst_engine.mapper import SchemaMapper
from gst_engine.utils import get_logger, load_config, read_excel, write_excel
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Semantic GST header standardization")
parser.add_argument("--input", required=True, help="Path to input Excel file")
parser.add_argument("--output", default="renamed.xlsx", help="Path to output Excel file")
parser.add_argument("--config", default="config.yaml", help="Path to YAML config")
parser.add_argument("--audit", default=None, help="Path to write JSONL audit log")
return parser
def main() -> None:
parser = build_parser()
args = parser.parse_args()
logger = get_logger()
config = load_config(args.config)
mapper = SchemaMapper(
model_name=config["model_name"],
model_path=config["model_path"],
threshold=config["threshold"],
review_threshold=config.get("review_threshold", 0.55),
)
dataframe = read_excel(args.input)
renamed_df, results = mapper.rename_dataframe(dataframe)
write_excel(renamed_df, args.output)
logger.info("Column standardization complete. Output written to %s", args.output)
# Report all mapping decisions.
unmapped = []
for col_name, result in results.items():
logger.info(
"%s -> %s (score=%.4f, status=%s)",
result.input_column, result.top1, result.top1_score, result.status,
)
if result.status in ("unmapped", "collision_dropped"):
unmapped.append(result)
if unmapped:
logger.warning(
"%d column(s) were NOT mapped: %s",
len(unmapped),
[r.input_column for r in unmapped],
)
if args.audit:
mapper.write_audit_log(results, args.audit)
if __name__ == "__main__":
main()