Release version 1.0.17

ivbeg · ivbeg · commit 762000cf82f2 · 2025-12-12T15:41:44.000+03:00
- Improved CLI documentation with detailed help text using Typer Annotated types
- Refactored analyzer output writing into separate function for better maintainability
- Fixed analyzer output file handling with proper context managers
- Updated CHANGELOG.md with release notes
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.0.17] - 2025-12-12
+
+### Changed
+- **Improved CLI documentation**: Enhanced all command-line interface functions with detailed help text using Typer's `Annotated` types
+- **Code refactoring**: Refactored analyzer output writing into separate `_write_analysis_output()` function for better maintainability
+- **Better file handling**: Improved file output handling in analyzer command with proper context managers
+
+### Fixed
+- Fixed analyzer output not writing to files correctly when `--output` option was used
+- Improved consistency between stdout and file output formatting
+
 ## [1.0.16] - 2025-12-12
 
 ### Added
@@ -116,7 +127,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - First public release on PyPI and updated github code
 
-[Unreleased]: https://github.com/datacoon/undatum/compare/v1.0.16...HEAD
+[Unreleased]: https://github.com/datacoon/undatum/compare/v1.0.17...HEAD
+[1.0.17]: https://github.com/datacoon/undatum/compare/v1.0.16...v1.0.17
 [1.0.16]: https://github.com/datacoon/undatum/compare/v1.0.15...v1.0.16
 [1.0.15]: https://github.com/datacoon/undatum/compare/v1.0.14...v1.0.15
 [1.0.14]: https://github.com/datacoon/undatum/compare/v1.0.13...v1.0.14
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "undatum"
-version = "1.0.16"
+version = "1.0.17"
 description = "A powerful command-line tool for data processing and analysis"
 readme = "README.md"
 requires-python = ">=3.8"
diff --git a/undatum/__init__.py b/undatum/__init__.py
@@ -4,6 +4,6 @@
 
 """
 
-__version__ = "1.0.16"
+__version__ = "1.0.17"
 __author__ = 'Ivan Begtin'
 __licence__ = 'MIT'
diff --git a/undatum/cmds/analyzer.py b/undatum/cmds/analyzer.py
@@ -11,6 +11,7 @@
 import io
 import json
 import os
+import sys
 import tempfile
 from collections import OrderedDict
 from typing import Optional
@@ -586,6 +587,88 @@ def _format_number(num):
     return f"{num:,}"
 
 
+def _write_analysis_output(report, options, output_stream):
+    """Write analysis report to output stream in the specified format."""
+    from tabulate import tabulate
+    
+    if options['outtype'] == 'json':
+        json_output = json.dumps(report.model_dump(), indent=4, ensure_ascii=False)
+        output_stream.write(json_output)
+        output_stream.write('\n')
+    elif options['outtype'] == 'yaml':
+        yaml_output = yaml.dump(report.model_dump(), Dumper=yaml.Dumper)
+        output_stream.write(yaml_output)
+    elif options['outtype'] == 'markdown':
+        raise NotImplementedError("Markdown output not implemented")
+    else:
+        # Text output format
+        # Print header
+        print("=" * 70, file=output_stream)
+        print("ANALYSIS REPORT", file=output_stream)
+        print("=" * 70, file=output_stream)
+        print(file=output_stream)
+        
+        # File information section
+        print("File Information", file=output_stream)
+        print("-" * 70, file=output_stream)
+        headers = ['Attribute', 'Value']
+        reptable = []
+        reptable.append(['Filename', str(report.filename)])
+        reptable.append(['File size', _format_file_size(report.file_size)])
+        reptable.append(['File type', report.file_type or 'N/A'])
+        reptable.append(['Compression', str(report.compression) if report.compression else 'None'])
+        reptable.append(['Total tables', _format_number(report.total_tables)])
+        reptable.append(['Total records', _format_number(report.total_records)])
+        for k, v in report.metadata.items():
+            reptable.append([k.replace('_', ' ').title(), str(v)])
+        print(tabulate(reptable, headers=headers, tablefmt='grid'), file=output_stream)
+        print(file=output_stream)
+
+        # Tables section
+        if report.tables:
+            print("=" * 70, file=output_stream)
+            print("TABLE STRUCTURES", file=output_stream)
+            print("=" * 70, file=output_stream)
+            print(file=output_stream)
+            
+            tabheaders = ['Field Name', 'Type', 'Is Array', 'Description']
+            for idx, rtable in enumerate(report.tables, 1):
+                if len(report.tables) > 1:
+                    print(f"Table {idx}: {rtable.id}", file=output_stream)
+                else:
+                    print(f"Table: {rtable.id}", file=output_stream)
+                print("-" * 70, file=output_stream)
+                print(f"  Records: {_format_number(rtable.num_records)}", file=output_stream)
+                print(f"  Columns: {_format_number(rtable.num_cols)}", file=output_stream)
+                print(f"  Structure: {'Flat' if rtable.is_flat else 'Nested'}", file=output_stream)
+                print(file=output_stream)
+                
+                table = []
+                for field in rtable.fields:
+                    desc = field.description if field.description else '-'
+                    table.append([
+                        field.name,
+                        field.ftype,
+                        'Yes' if field.is_array else 'No',
+                        desc
+                    ])
+                print(tabulate(table, headers=tabheaders, tablefmt='grid'), file=output_stream)
+                
+                if rtable.description:
+                    print(file=output_stream)
+                    print("Summary:", file=output_stream)
+                    print("-" * 70, file=output_stream)
+                    # Wrap description text for better readability
+                    desc_lines = rtable.description.split('\n')
+                    for line in desc_lines:
+                        if line.strip():
+                            print(f"  {line.strip()}", file=output_stream)
+                
+                if idx < len(report.tables):
+                    print(file=output_stream)
+                    print(file=output_stream)
+
+
 class Analyzer:
     """Data analysis handler."""
     def __init__(self):
@@ -594,94 +677,21 @@ def __init__(self):
 
     def analyze(self, filename, options):
         """Analyzes given data file and returns it's parameters"""
-        from tabulate import tabulate
-
-        table = None
         encoding = options.get('encoding')
         report = analyze(filename, encoding=encoding,
                         engine=options['engine'],
                         use_pandas=options['use_pandas'],
                         autodoc=options['autodoc'], lang=options['lang'],
                         ai_provider=options.get('ai_provider'),
                         ai_config=options.get('ai_config'))
-        if options['outtype'] == 'json':
-            if options['output'] is not None:
-                with open(options['output'], 'w', encoding='utf8') as f:
-                    f.write(json.dumps(report.model_dump()))
-            else:
-                print(json.dumps(report.model_dump(), indent=4, ensure_ascii=False))
-        if options['outtype'] == 'yaml':
-            if options['output'] is not None:
-                with open(options['output'], 'w', encoding='utf8') as f:
-                    f.write(yaml.dump(report.model_dump(), Dumper=yaml.Dumper))
-            else:
-                print(yaml.dump(report.model_dump(), Dumper=yaml.Dumper))
-
-        elif options['outtype'] == 'markdown':
-            raise NotImplementedError("Markdown output not implemented")
+        
+        # Determine output destination
+        output_file = options.get('output')
+        
+        if output_file:
+            # Use context manager for file output
+            with open(output_file, 'w', encoding='utf8') as output_stream:
+                _write_analysis_output(report, options, output_stream)
         else:
-            # Print header
-            print("=" * 70)
-            print("ANALYSIS REPORT")
-            print("=" * 70)
-            print()
-            
-            # File information section
-            print("File Information")
-            print("-" * 70)
-            headers = ['Attribute', 'Value']
-            reptable = []
-            reptable.append(['Filename', str(report.filename)])
-            reptable.append(['File size', _format_file_size(report.file_size)])
-            reptable.append(['File type', report.file_type or 'N/A'])
-            reptable.append(['Compression', str(report.compression) if report.compression else 'None'])
-            reptable.append(['Total tables', _format_number(report.total_tables)])
-            reptable.append(['Total records', _format_number(report.total_records)])
-            for k, v in report.metadata.items():
-                reptable.append([k.replace('_', ' ').title(), str(v)])
-            print(tabulate(reptable, headers=headers, tablefmt='grid'))
-            print()
-
-            # Tables section
-            if report.tables:
-                print("=" * 70)
-                print("TABLE STRUCTURES")
-                print("=" * 70)
-                print()
-                
-                tabheaders = ['Field Name', 'Type', 'Is Array', 'Description']
-                for idx, rtable in enumerate(report.tables, 1):
-                    if len(report.tables) > 1:
-                        print(f"Table {idx}: {rtable.id}")
-                    else:
-                        print(f"Table: {rtable.id}")
-                    print("-" * 70)
-                    print(f"  Records: {_format_number(rtable.num_records)}")
-                    print(f"  Columns: {_format_number(rtable.num_cols)}")
-                    print(f"  Structure: {'Flat' if rtable.is_flat else 'Nested'}")
-                    print()
-                    
-                    table = []
-                    for field in rtable.fields:
-                        desc = field.description if field.description else '-'
-                        table.append([
-                            field.name,
-                            field.ftype,
-                            'Yes' if field.is_array else 'No',
-                            desc
-                        ])
-                    print(tabulate(table, headers=tabheaders, tablefmt='grid'))
-                    
-                    if rtable.description:
-                        print()
-                        print("Summary:")
-                        print("-" * 70)
-                        # Wrap description text for better readability
-                        desc_lines = rtable.description.split('\n')
-                        for line in desc_lines:
-                            if line.strip():
-                                print(f"  {line.strip()}")
-                    
-                    if idx < len(report.tables):
-                        print()
-                        print()
+            # Write to stdout
+            _write_analysis_output(report, options, sys.stdout)
diff --git a/undatum/core.py b/undatum/core.py