diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d3f0b49..b30cc10 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,5 +29,8 @@ jobs: - name: Lint with ruff run: ruff check . + - name: Type check with mypy + run: mypy data_hygiene_auditor/ + - name: Run tests run: pytest -v diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..2766c74 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,58 @@ +name: Publish to PyPI + +on: + push: + tags: ["v*"] + +permissions: + id-token: write + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install build tools + run: pip install build + + - name: Build package + run: python -m build + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install and test + run: | + pip install .[dev] + ruff check . + mypy data_hygiene_auditor/ + pytest -v + + publish: + needs: [build, test] + runs-on: ubuntu-latest + environment: pypi + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index da7a389..d8a75e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - GitHub Action (`.github/actions/audit/action.yml`) - `--version` / `-V` flag - `--quiet` / `-q` flag to suppress terminal output +- `--export-fixes` flag: export remediation plan as CSV (sorted by severity, with fix code and assignee columns) - `--force` flag to override the 2M row safety limit - `count_issues()` shared helper for consistent issue counting - Warning when fuzzy (Levenshtein) matching is skipped due to row count @@ -33,6 +34,10 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - Minimum Python version raised from 3.8 to 3.9 +- mypy type checking added to CI (public API and rules module strictly typed) +- PyPI classifiers expanded (license, Python versions, `Typing :: Typed`) +- Automated PyPI publish workflow (push `v*` tag → build → test → publish) +- README refreshed with "data linter" positioning and quick-start install ## [1.0.0] - 2026-05-09 diff --git a/README.md b/README.md index 3810d4d..8d0b15a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,15 @@ # Data Hygiene Auditor +**A linter for your data.** Point it at a spreadsheet, get back every inconsistency, placeholder, and hidden duplicate — with severity ratings, root causes, and fix code. + +``` +pip install data-hygiene-auditor +data-hygiene-audit --input customers.xlsx --output ./reports +``` + Phone numbers stored seven different ways in the same column. "TBD" sitting in a status field for three years. A customer record that looks unique until you notice that whitespace and casing are the only things separating it from four others. These are the issues consultants inherit when they take over someone else's spreadsheet — and the ones nobody finds until they're already in production. -The Data Hygiene Auditor is a Python CLI that scans Excel workbooks for the specific real-world failure modes that show up in actual consulting engagements: mixed-format inconsistencies, fields used for the wrong purpose, placeholder values that escaped into production, and phantom duplicates hiding behind cosmetic differences. +The Data Hygiene Auditor scans Excel, CSV, and TSV files for the specific real-world failure modes that show up in actual consulting engagements: mixed-format inconsistencies, fields used for the wrong purpose, placeholder values that escaped into production, and phantom duplicates hiding behind cosmetic differences. A single run produces three reports tailored to three audiences: an **HTML report** for the stakeholder meeting, an **Excel findings file** for the person doing the cleanup, and a **PDF** for the deliverable folder. @@ -73,13 +80,13 @@ python audit.py --input samples/input/sample_messy_data.xlsx --output samples/ou ## Installation ``` -pip install . +pip install data-hygiene-auditor ``` -Or install dependencies directly: +Or install from source: ``` -pip install -r requirements.txt +pip install . ``` ## Usage @@ -109,6 +116,7 @@ Supports `.xlsx`, `.xls`, `.csv`, and `.tsv` files. | `--baseline`, `-b` | Path to a previous audit JSON for trend comparison (shows deltas) | | `--rules`, `-r` | Path to custom rules JSON for additional checks | | `--sarif` | Output findings in SARIF format (for GitHub Code Scanning) | +| `--export-fixes` | Export remediation plan as CSV (sorted by severity, with fix code) | | `--fail-under` | Exit with code 1 if health score is below this threshold (0-100) | | `--quiet`, `-q` | Suppress all terminal output (just write report files) | | `--force` | Process files exceeding the 2M row safety limit | @@ -290,6 +298,20 @@ python generate_sample.py - openpyxl - reportlab +## Releasing + +To publish a new version to PyPI: + +1. Update `version` in `pyproject.toml` +2. Add a release entry to `CHANGELOG.md` +3. Commit, tag, and push: + ``` + git tag v1.1.0 + git push origin v1.1.0 + ``` + +The `publish.yml` workflow builds, tests, and uploads to PyPI automatically on version tags. + ## License MIT — see [LICENSE](LICENSE) diff --git a/data_hygiene_auditor/api.py b/data_hygiene_auditor/api.py index c4d1ca0..bcac2fb 100644 --- a/data_hygiene_auditor/api.py +++ b/data_hygiene_auditor/api.py @@ -14,9 +14,10 @@ from __future__ import annotations +import dataclasses import os import tempfile -from dataclasses import dataclass, field +from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional @@ -42,7 +43,7 @@ class Finding: severity: str description: str why: str - detail: Dict[str, Any] = field(default_factory=dict) + detail: Dict[str, Any] = dataclasses.field(default_factory=dict) fix: Optional[FixSuggestion] = None @property @@ -67,7 +68,7 @@ class Duplicate: rows: List[int] group_size: int why: str - sample_data: List[Dict[str, str]] = field(default_factory=list) + sample_data: List[Dict[str, str]] = dataclasses.field(default_factory=list) fix: Optional[FixSuggestion] = None @@ -80,8 +81,8 @@ class FuzzyDuplicate: rows: List[int] group_size: int why: str - field_differences: Dict[str, Any] = field(default_factory=dict) - sample_data: List[Dict[str, str]] = field(default_factory=list) + field_differences: Dict[str, Any] = dataclasses.field(default_factory=dict) + sample_data: List[Dict[str, str]] = dataclasses.field(default_factory=list) similarity_threshold: Optional[float] = None fix: Optional[FixSuggestion] = None @@ -115,7 +116,7 @@ class FieldResult: total_missing: int missing_pct: float total_rows: int - findings: List[Finding] = field(default_factory=list) + findings: List[Finding] = dataclasses.field(default_factory=list) profile: Optional[ColumnProfile] = None @@ -127,10 +128,10 @@ class SheetResult: row_count: int col_count: int health_score: int - fields: List[FieldResult] = field(default_factory=list) - duplicates: List[Duplicate] = field(default_factory=list) - fuzzy_duplicates: List[FuzzyDuplicate] = field(default_factory=list) - schema_violations: List[SchemaViolation] = field(default_factory=list) + fields: List[FieldResult] = dataclasses.field(default_factory=list) + duplicates: List[Duplicate] = dataclasses.field(default_factory=list) + fuzzy_duplicates: List[FuzzyDuplicate] = dataclasses.field(default_factory=list) + schema_violations: List[SchemaViolation] = dataclasses.field(default_factory=list) @property def findings(self) -> List[Finding]: @@ -157,9 +158,9 @@ class AuditResult: input_file: str audit_timestamp: str overall_score: int - sheets: List[SheetResult] = field(default_factory=list) + sheets: List[SheetResult] = dataclasses.field(default_factory=list) trend: Optional[TrendData] = None - _raw: Dict[str, Any] = field(default_factory=dict, repr=False) + _raw: Dict[str, Any] = dataclasses.field(default_factory=dict, repr=False) @property def total_issues(self) -> int: @@ -231,7 +232,7 @@ class SchemaViolation: severity: str column: str why: str - detail: Dict[str, Any] = field(default_factory=dict) + detail: Dict[str, Any] = dataclasses.field(default_factory=dict) @dataclass @@ -244,8 +245,8 @@ class TrendData: overall_score_previous: int total_issues_delta: int total_issues_previous: int - severity_deltas: Dict[str, int] = field(default_factory=dict) - sheets: Dict[str, Any] = field(default_factory=dict) + severity_deltas: Dict[str, int] = dataclasses.field(default_factory=dict) + sheets: Dict[str, Any] = dataclasses.field(default_factory=dict) def _describe_issue(issue_type: str, detail: dict) -> str: @@ -257,7 +258,7 @@ def _describe_issue(issue_type: str, detail: dict) -> str: f" deviate from {detail.get('dominant_format', '')}" ) if issue_type == 'wrong_purpose': - return detail.get('issue', 'Wrong purpose') + return str(detail.get('issue', 'Wrong purpose')) if issue_type in ('placeholder_value', 'placeholder'): return ( f"Placeholder \"{detail.get('value', '')}\" found" diff --git a/data_hygiene_auditor/cli.py b/data_hygiene_auditor/cli.py index 00bdfe1..70c3f7e 100644 --- a/data_hygiene_auditor/cli.py +++ b/data_hygiene_auditor/cli.py @@ -139,6 +139,79 @@ def _generate_sarif(all_results, input_files): } +def _export_remediation_csv(all_results, output_path): + """Export a CSV remediation plan with one row per fixable issue.""" + import csv + + rows = [] + for results in all_results: + source_file = results.get('input_file', '') + for sheet_name, sheet_data in results['sheets'].items(): + for col_name, field_data in sheet_data['fields'].items(): + for issue in field_data['issues']: + fix = issue.get('fix', {}) + detail = issue.get('detail', {}) + msg = '' + if isinstance(detail, dict): + msg = detail.get('message', '') + if not msg and 'issue' in detail: + msg = detail['issue'] + rows.append({ + 'File': source_file, + 'Sheet': sheet_name, + 'Field': col_name, + 'Issue Type': issue.get('rule_name', issue['type']), + 'Severity': issue['severity'], + 'Description': msg, + 'Fix Strategy': fix.get('strategy', '') if fix else '', + 'Fix Code': fix.get('code', '') if fix else '', + 'Assigned To': '', + 'Status': 'Open', + }) + + for dup in sheet_data['phantom_duplicates']: + fix = dup.get('fix', {}) + rows.append({ + 'File': source_file, + 'Sheet': sheet_name, + 'Field': '(row-level)', + 'Issue Type': dup['type'], + 'Severity': dup['severity'], + 'Description': f"{dup['group_size']} rows: {', '.join(str(r) for r in dup['rows'][:5])}", + 'Fix Strategy': fix.get('strategy', '') if fix else '', + 'Fix Code': fix.get('code', '') if fix else '', + 'Assigned To': '', + 'Status': 'Open', + }) + + for fuzz in sheet_data.get('fuzzy_duplicates', []): + fix = fuzz.get('fix', {}) + rows.append({ + 'File': source_file, + 'Sheet': sheet_name, + 'Field': '(row-level)', + 'Issue Type': 'fuzzy_duplicate', + 'Severity': fuzz['severity'], + 'Description': f"{fuzz['group_size']} rows: {', '.join(str(r) for r in fuzz['rows'][:5])}", + 'Fix Strategy': fix.get('strategy', '') if fix else '', + 'Fix Code': fix.get('code', '') if fix else '', + 'Assigned To': '', + 'Status': 'Open', + }) + + rows.sort(key=lambda r: {'High': 0, 'Medium': 1, 'Low': 2}.get(r['Severity'], 3)) + + fieldnames = [ + 'File', 'Sheet', 'Field', 'Issue Type', 'Severity', + 'Description', 'Fix Strategy', 'Fix Code', + 'Assigned To', 'Status', + ] + with open(output_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + def main(): parser = argparse.ArgumentParser( description=( @@ -198,6 +271,10 @@ def main(): '--sarif', help='Output findings in SARIF format to the given path', ) + parser.add_argument( + '--export-fixes', + help='Export remediation plan as CSV to the given path', + ) parser.add_argument( '--quiet', '-q', action='store_true', help='Suppress all terminal output (just write report files)', @@ -323,6 +400,10 @@ def _log(msg=''): json.dump(sarif_data, f, indent=2) _log(f" {_c('SARIF', '32')} -> {args.sarif}") + if args.export_fixes: + _export_remediation_csv(all_results, args.export_fixes) + _log(f" {_c('Fixes', '32')} -> {args.export_fixes}") + total_counts = {'total': 0, 'High': 0, 'Medium': 0, 'Low': 0, 'schema': 0} scores = [] for results in all_results: diff --git a/data_hygiene_auditor/core.py b/data_hygiene_auditor/core.py index cdd60b1..20b2ebf 100644 --- a/data_hygiene_auditor/core.py +++ b/data_hygiene_auditor/core.py @@ -86,7 +86,7 @@ def count_issues(results): Returns dict with keys: 'total', 'High', 'Medium', 'Low', 'schema'. """ from collections import Counter - totals = Counter() + totals: Counter[str] = Counter() schema_count = 0 for sheet in results['sheets'].values(): for field_data in sheet['fields'].values(): @@ -145,7 +145,7 @@ def run_audit(input_path, fuzzy_threshold=0.85, schema_path=None, baseline_path= if df.empty: continue - sheet_results = { + sheet_results: dict = { 'row_count': len(df), 'col_count': len(df.columns), 'fields': {}, diff --git a/data_hygiene_auditor/detection.py b/data_hygiene_auditor/detection.py index 8dfd7f3..8ee568c 100644 --- a/data_hygiene_auditor/detection.py +++ b/data_hygiene_auditor/detection.py @@ -195,7 +195,7 @@ def analyze_mixed_formats(series, field_type): def analyze_wrong_purpose(series, col_name, field_type): """Detect fields being used for the wrong purpose.""" - findings = [] + findings: list[dict] = [] str_vals = series.dropna().astype(str).str.strip() non_null = str_vals[str_vals != ''] if len(non_null) == 0: @@ -245,7 +245,7 @@ def analyze_wrong_purpose(series, col_name, field_type): if field_type == 'id': alpha_mask = non_null.str.match(r'^[A-Za-z]+-\d+$', na=False) bare_mask = non_null.str.match(r'^\d+$', na=False) - type_counts = Counter() + type_counts: Counter[str] = Counter() alpha_count = int(alpha_mask.sum()) bare_count = int(bare_mask.sum()) other_count = len(non_null) - alpha_count - bare_count @@ -310,7 +310,7 @@ def analyze_wrong_purpose(series, col_name, field_type): def analyze_placeholders(series, col_name): """Detect suspiciously uniform or placeholder data.""" - findings = [] + findings: list[dict] = [] str_vals = series.dropna().astype(str).str.strip() non_null = str_vals[str_vals != ''] if len(non_null) == 0: @@ -346,7 +346,7 @@ def analyze_placeholders(series, col_name): def analyze_phantom_duplicates(df, sheet_name, field_types=None): """Detect records that are the same after normalizing whitespace/case/punctuation.""" - findings = [] + findings: list[dict] = [] if df.empty or len(df) < 2: return findings @@ -388,9 +388,9 @@ def analyze_phantom_duplicates(df, sheet_name, field_types=None): if dup_sigs.empty: return findings - groups = defaultdict(list) - for idx, sig in dup_sigs.items(): - groups[sig].append(idx) + groups: dict[str, list[int]] = defaultdict(list) + for idx, sig in zip(dup_sigs.index, dup_sigs.values): + groups[sig].append(int(idx)) for sig, indices in groups.items(): if len(indices) < 2: @@ -470,7 +470,7 @@ def analyze_fuzzy_duplicates( Finds matches that normalize-and-hash misses: token reordering, abbreviations, and typos. """ - findings = [] + findings: list[dict] = [] if df.empty or len(df) < 2: return findings @@ -521,14 +521,14 @@ def analyze_fuzzy_duplicates( continue fp_matched.update(indices) - row_nums = [i + 2 for i in indices] + row_nums = [int(i) + 2 for i in indices] sample_rows = [] for i in indices[:3]: sample_rows.append( {col: str(df.iloc[i][col]) for col in df.columns[:6]}, ) - differences = {} + differences: dict = {} for col in content_cols: vals = [str(df.iloc[i][col]) for i in indices] unique_vals = list(dict.fromkeys(vals)) diff --git a/data_hygiene_auditor/reporting/excel.py b/data_hygiene_auditor/reporting/excel.py index 612e2fd..0e9ed6a 100644 --- a/data_hygiene_auditor/reporting/excel.py +++ b/data_hygiene_auditor/reporting/excel.py @@ -6,7 +6,7 @@ from openpyxl.styles import Alignment, Border, Font, PatternFill, Side -def generate_excel(results, output_path): +def generate_excel(results: dict, output_path: str) -> str: """Generate sortable/filterable Excel findings file.""" wb = Workbook() ws = wb.active diff --git a/data_hygiene_auditor/reporting/html.py b/data_hygiene_auditor/reporting/html.py index e9391e5..dea1762 100644 --- a/data_hygiene_auditor/reporting/html.py +++ b/data_hygiene_auditor/reporting/html.py @@ -29,7 +29,7 @@ def _render_fix(fix): ) -def generate_html(results, output_path): +def generate_html(results: dict, output_path: str) -> str: """Generate a client-readable HTML report.""" counts = count_issues(results) total_issues = counts.get('total', 0) diff --git a/data_hygiene_auditor/reporting/pdf.py b/data_hygiene_auditor/reporting/pdf.py index f3c9e74..61877c5 100644 --- a/data_hygiene_auditor/reporting/pdf.py +++ b/data_hygiene_auditor/reporting/pdf.py @@ -23,7 +23,7 @@ def _p(val): return _xml_escape(str(val)) -def generate_pdf(results, output_path): +def generate_pdf(results: dict, output_path: str) -> str: """Generate a clean PDF report matching the HTML content.""" doc = SimpleDocTemplate( output_path, pagesize=letter, @@ -93,7 +93,7 @@ def generate_pdf(results, output_path): story.append(Spacer(1, 8)) total_issues = 0 - severity_totals = Counter() + severity_totals: Counter[str] = Counter() for sheet in results['sheets'].values(): for field in sheet['fields'].values(): for issue in field['issues']: diff --git a/data_hygiene_auditor/rules.py b/data_hygiene_auditor/rules.py index 1c80e04..b2665af 100644 --- a/data_hygiene_auditor/rules.py +++ b/data_hygiene_auditor/rules.py @@ -6,6 +6,8 @@ from pathlib import Path from typing import Any, Dict, List, Optional +import pandas as pd + VALID_CONDITIONS = { 'regex_match', 'not_regex_match', @@ -152,7 +154,7 @@ def _parse_rule(entry: Dict[str, Any], index: int) -> Rule: ) -def evaluate_rule(rule: Rule, series, col_name: str) -> Optional[Dict[str, Any]]: +def evaluate_rule(rule: Rule, series: pd.Series, col_name: str) -> Optional[Dict[str, Any]]: """Evaluate a single rule against a column. Returns a finding dict or None.""" if not rule.matches_column(col_name): return None diff --git a/data_hygiene_auditor/schema.py b/data_hygiene_auditor/schema.py index 6ff2892..f40e111 100644 --- a/data_hygiene_auditor/schema.py +++ b/data_hygiene_auditor/schema.py @@ -20,7 +20,7 @@ def load_schema(path): with open(path) as f: raw = json.load(f) - schema = {'columns': {}, 'sheets': {}} + schema: dict = {'columns': {}, 'sheets': {}} for col, spec in raw.get('columns', {}).items(): schema['columns'][col] = _normalize_spec(spec) @@ -65,7 +65,7 @@ def generate_schema(results): def validate_schema(sheet_data, schema, sheet_name): """Validate a sheet against a schema. Returns list of violation dicts.""" - findings = [] + findings: list[dict] = [] col_specs = dict(schema.get('columns', {})) sheet_spec = schema.get('sheets', {}).get(sheet_name, {}) diff --git a/data_hygiene_auditor/suggestions.py b/data_hygiene_auditor/suggestions.py index 294e9fb..1eaf04d 100644 --- a/data_hygiene_auditor/suggestions.py +++ b/data_hygiene_auditor/suggestions.py @@ -12,7 +12,8 @@ def generate_fix(issue_type: str, detail: dict, col_name: str = '', """ handler = _HANDLERS.get(issue_type) if handler: - return handler(detail, col_name, field_type) + result: dict | None = handler(detail, col_name, field_type) + return result return None diff --git a/data_hygiene_auditor/trend.py b/data_hygiene_auditor/trend.py index f6cc0df..04ad203 100644 --- a/data_hygiene_auditor/trend.py +++ b/data_hygiene_auditor/trend.py @@ -78,7 +78,7 @@ def compute_trend(current, baseline): def _count_issues(results): """Count total and per-severity issues across all sheets.""" - counts = Counter() + counts: Counter[str] = Counter() for sheet_data in results.get('sheets', {}).values(): counts += _count_sheet_issues(sheet_data) return counts @@ -86,7 +86,7 @@ def _count_issues(results): def _count_sheet_issues(sheet_data): """Count issues in a single sheet.""" - counts = Counter() + counts: Counter[str] = Counter() for field_data in sheet_data.get('fields', {}).values(): for issue in field_data.get('issues', []): counts['total'] += 1 diff --git a/pyproject.toml b/pyproject.toml index c647a90..31b7f3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,19 +5,26 @@ build-backend = "setuptools.build_meta" [project] name = "data-hygiene-auditor" version = "1.0.0" -description = "Detect data quality issues in Excel and CSV files — mixed formats, misused fields, placeholder floods, and phantom duplicates" +description = "A linter for your data — detect mixed formats, misused fields, placeholder floods, and phantom duplicates in Excel and CSV files" readme = "README.md" license = {text = "MIT"} requires-python = ">=3.9" authors = [ {name = "Lailara LLC"}, ] -keywords = ["data-quality", "excel", "csv", "audit", "data-hygiene", "data-profiling"] +keywords = ["data-quality", "data-linter", "excel", "csv", "audit", "data-hygiene", "data-profiling", "data-validation"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Typing :: Typed", "Topic :: Scientific/Engineering :: Information Analysis", ] dependencies = [ @@ -27,7 +34,7 @@ dependencies = [ ] [project.optional-dependencies] -dev = ["pytest>=8.0", "ruff>=0.4"] +dev = ["pytest>=8.0", "ruff>=0.4", "mypy>=1.10", "pandas-stubs>=2.0"] [tool.setuptools.packages.find] include = ["data_hygiene_auditor*"] @@ -41,6 +48,24 @@ data-hygiene-audit = "data_hygiene_auditor.cli:main" [tool.pytest.ini_options] testpaths = ["tests"] +[tool.mypy] +warn_unused_configs = true +ignore_missing_imports = true +check_untyped_defs = true + +[[tool.mypy.overrides]] +module = "data_hygiene_auditor.api" +disallow_untyped_defs = true +warn_return_any = true + +[[tool.mypy.overrides]] +module = "data_hygiene_auditor.rules" +disallow_untyped_defs = true + +[[tool.mypy.overrides]] +module = ["data_hygiene_auditor.reporting.*", "data_hygiene_auditor.cli"] +check_untyped_defs = false + [tool.ruff] target-version = "py39" line-length = 120 @@ -52,3 +77,5 @@ select = ["E", "F", "W", "I"] [project.urls] Homepage = "https://github.com/MsShawnP/Data-Hygiene-Auditor" Repository = "https://github.com/MsShawnP/Data-Hygiene-Auditor" +Issues = "https://github.com/MsShawnP/Data-Hygiene-Auditor/issues" +Changelog = "https://github.com/MsShawnP/Data-Hygiene-Auditor/blob/main/CHANGELOG.md"