1111import io
1212import json
1313import os
14+ import sys
1415import tempfile
1516from collections import OrderedDict
1617from typing import Optional
@@ -586,6 +587,88 @@ def _format_number(num):
586587 return f"{ num :,} "
587588
588589
590+ def _write_analysis_output (report , options , output_stream ):
591+ """Write analysis report to output stream in the specified format."""
592+ from tabulate import tabulate
593+
594+ if options ['outtype' ] == 'json' :
595+ json_output = json .dumps (report .model_dump (), indent = 4 , ensure_ascii = False )
596+ output_stream .write (json_output )
597+ output_stream .write ('\n ' )
598+ elif options ['outtype' ] == 'yaml' :
599+ yaml_output = yaml .dump (report .model_dump (), Dumper = yaml .Dumper )
600+ output_stream .write (yaml_output )
601+ elif options ['outtype' ] == 'markdown' :
602+ raise NotImplementedError ("Markdown output not implemented" )
603+ else :
604+ # Text output format
605+ # Print header
606+ print ("=" * 70 , file = output_stream )
607+ print ("ANALYSIS REPORT" , file = output_stream )
608+ print ("=" * 70 , file = output_stream )
609+ print (file = output_stream )
610+
611+ # File information section
612+ print ("File Information" , file = output_stream )
613+ print ("-" * 70 , file = output_stream )
614+ headers = ['Attribute' , 'Value' ]
615+ reptable = []
616+ reptable .append (['Filename' , str (report .filename )])
617+ reptable .append (['File size' , _format_file_size (report .file_size )])
618+ reptable .append (['File type' , report .file_type or 'N/A' ])
619+ reptable .append (['Compression' , str (report .compression ) if report .compression else 'None' ])
620+ reptable .append (['Total tables' , _format_number (report .total_tables )])
621+ reptable .append (['Total records' , _format_number (report .total_records )])
622+ for k , v in report .metadata .items ():
623+ reptable .append ([k .replace ('_' , ' ' ).title (), str (v )])
624+ print (tabulate (reptable , headers = headers , tablefmt = 'grid' ), file = output_stream )
625+ print (file = output_stream )
626+
627+ # Tables section
628+ if report .tables :
629+ print ("=" * 70 , file = output_stream )
630+ print ("TABLE STRUCTURES" , file = output_stream )
631+ print ("=" * 70 , file = output_stream )
632+ print (file = output_stream )
633+
634+ tabheaders = ['Field Name' , 'Type' , 'Is Array' , 'Description' ]
635+ for idx , rtable in enumerate (report .tables , 1 ):
636+ if len (report .tables ) > 1 :
637+ print (f"Table { idx } : { rtable .id } " , file = output_stream )
638+ else :
639+ print (f"Table: { rtable .id } " , file = output_stream )
640+ print ("-" * 70 , file = output_stream )
641+ print (f" Records: { _format_number (rtable .num_records )} " , file = output_stream )
642+ print (f" Columns: { _format_number (rtable .num_cols )} " , file = output_stream )
643+ print (f" Structure: { 'Flat' if rtable .is_flat else 'Nested' } " , file = output_stream )
644+ print (file = output_stream )
645+
646+ table = []
647+ for field in rtable .fields :
648+ desc = field .description if field .description else '-'
649+ table .append ([
650+ field .name ,
651+ field .ftype ,
652+ 'Yes' if field .is_array else 'No' ,
653+ desc
654+ ])
655+ print (tabulate (table , headers = tabheaders , tablefmt = 'grid' ), file = output_stream )
656+
657+ if rtable .description :
658+ print (file = output_stream )
659+ print ("Summary:" , file = output_stream )
660+ print ("-" * 70 , file = output_stream )
661+ # Wrap description text for better readability
662+ desc_lines = rtable .description .split ('\n ' )
663+ for line in desc_lines :
664+ if line .strip ():
665+ print (f" { line .strip ()} " , file = output_stream )
666+
667+ if idx < len (report .tables ):
668+ print (file = output_stream )
669+ print (file = output_stream )
670+
671+
589672class Analyzer :
590673 """Data analysis handler."""
591674 def __init__ (self ):
@@ -594,94 +677,21 @@ def __init__(self):
594677
595678 def analyze (self , filename , options ):
596679 """Analyzes given data file and returns it's parameters"""
597- from tabulate import tabulate
598-
599- table = None
600680 encoding = options .get ('encoding' )
601681 report = analyze (filename , encoding = encoding ,
602682 engine = options ['engine' ],
603683 use_pandas = options ['use_pandas' ],
604684 autodoc = options ['autodoc' ], lang = options ['lang' ],
605685 ai_provider = options .get ('ai_provider' ),
606686 ai_config = options .get ('ai_config' ))
607- if options ['outtype' ] == 'json' :
608- if options ['output' ] is not None :
609- with open (options ['output' ], 'w' , encoding = 'utf8' ) as f :
610- f .write (json .dumps (report .model_dump ()))
611- else :
612- print (json .dumps (report .model_dump (), indent = 4 , ensure_ascii = False ))
613- if options ['outtype' ] == 'yaml' :
614- if options ['output' ] is not None :
615- with open (options ['output' ], 'w' , encoding = 'utf8' ) as f :
616- f .write (yaml .dump (report .model_dump (), Dumper = yaml .Dumper ))
617- else :
618- print (yaml .dump (report .model_dump (), Dumper = yaml .Dumper ))
619-
620- elif options ['outtype' ] == 'markdown' :
621- raise NotImplementedError ("Markdown output not implemented" )
687+
688+ # Determine output destination
689+ output_file = options .get ('output' )
690+
691+ if output_file :
692+ # Use context manager for file output
693+ with open (output_file , 'w' , encoding = 'utf8' ) as output_stream :
694+ _write_analysis_output (report , options , output_stream )
622695 else :
623- # Print header
624- print ("=" * 70 )
625- print ("ANALYSIS REPORT" )
626- print ("=" * 70 )
627- print ()
628-
629- # File information section
630- print ("File Information" )
631- print ("-" * 70 )
632- headers = ['Attribute' , 'Value' ]
633- reptable = []
634- reptable .append (['Filename' , str (report .filename )])
635- reptable .append (['File size' , _format_file_size (report .file_size )])
636- reptable .append (['File type' , report .file_type or 'N/A' ])
637- reptable .append (['Compression' , str (report .compression ) if report .compression else 'None' ])
638- reptable .append (['Total tables' , _format_number (report .total_tables )])
639- reptable .append (['Total records' , _format_number (report .total_records )])
640- for k , v in report .metadata .items ():
641- reptable .append ([k .replace ('_' , ' ' ).title (), str (v )])
642- print (tabulate (reptable , headers = headers , tablefmt = 'grid' ))
643- print ()
644-
645- # Tables section
646- if report .tables :
647- print ("=" * 70 )
648- print ("TABLE STRUCTURES" )
649- print ("=" * 70 )
650- print ()
651-
652- tabheaders = ['Field Name' , 'Type' , 'Is Array' , 'Description' ]
653- for idx , rtable in enumerate (report .tables , 1 ):
654- if len (report .tables ) > 1 :
655- print (f"Table { idx } : { rtable .id } " )
656- else :
657- print (f"Table: { rtable .id } " )
658- print ("-" * 70 )
659- print (f" Records: { _format_number (rtable .num_records )} " )
660- print (f" Columns: { _format_number (rtable .num_cols )} " )
661- print (f" Structure: { 'Flat' if rtable .is_flat else 'Nested' } " )
662- print ()
663-
664- table = []
665- for field in rtable .fields :
666- desc = field .description if field .description else '-'
667- table .append ([
668- field .name ,
669- field .ftype ,
670- 'Yes' if field .is_array else 'No' ,
671- desc
672- ])
673- print (tabulate (table , headers = tabheaders , tablefmt = 'grid' ))
674-
675- if rtable .description :
676- print ()
677- print ("Summary:" )
678- print ("-" * 70 )
679- # Wrap description text for better readability
680- desc_lines = rtable .description .split ('\n ' )
681- for line in desc_lines :
682- if line .strip ():
683- print (f" { line .strip ()} " )
684-
685- if idx < len (report .tables ):
686- print ()
687- print ()
696+ # Write to stdout
697+ _write_analysis_output (report , options , sys .stdout )
0 commit comments