diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..7ec4ee6
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,32 @@
+name: Node.js CI Pipeline
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        node-version: [20.x, 22.x]
+
+    steps:
+    - uses: actions/checkout@v3
+    
+    - name: Use Node.js ${{ matrix.node-version }}
+      uses: actions/setup-node@v3
+      with:
+        node-version: ${{ matrix.node-version }}
+        cache: 'npm'
+        
+    - name: Clean Install and Test
+      run: |
+        npm ci
+        npm test
+        npm run docs:check
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 84545d8..8f86baa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ Thumbs.db
 .env.local
 copilot-chat-history.json
 *.traineddata
+.analytics_cache.json
 
 # =========================
 # Bot Specific: Data & Media
diff --git a/.husky/pre-commit b/.husky/pre-commit
new file mode 100755
index 0000000..86f640a
--- /dev/null
+++ b/.husky/pre-commit
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+
+npm run docs:generate || exit 1
+git add docs/ || exit 1
+npm test || exit 1
+npm run docs:check || exit 1
diff --git a/README.md b/README.md
index b4a1862..e4124dc 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,9 @@ The current Node ingestion pipeline only analyzes text-oriented files.
 | `.csv` | Ingested by the active Node pipeline |
 | `.log` | Ingested by the active Node pipeline |
 | `.pdf` | Ingested by the active Node pipeline |
+| `.png` | Ingested by the active Node pipeline |
+| `.jpg` | Ingested by the active Node pipeline |
+| `.jpeg` | Ingested by the active Node pipeline |
 <!-- GENERATED:supported-file-types:END -->
 
 ## Repository Layout
@@ -87,7 +90,7 @@ The current Node ingestion pipeline only analyzes text-oriented files.
 <!-- GENERATED:repo-layout:START -->
 - `src/index.js` — Node CLI entry point.
 - `src/pipeline.js` — Pipeline coordinator that assembles all analytics tiers.
-- `src/ingestion/file-ingestion.js` — Read-only recursive file ingestion for supported text files.
+- `src/ingestion/file-ingestion.js` — Read-only recursive file ingestion for supported files.
 - `src/analytics/` — Descriptive, diagnostic, predictive, and prescriptive analytics modules.
 - `test/pipeline.test.js` — Node test coverage for core pipeline behavior.
 - `docs/architecture.md` — Hand-authored architecture overview for current and planned system design.
@@ -125,6 +128,99 @@ The bot must never modify, move, or delete ingested source files. Ingestion is r
 - When adding analytics, classify behavior under one of the four analytics tiers.
 - Update [docs/architecture.md](docs/architecture.md) when implementation changes affect current-vs-planned system boundaries.
 
+
+<br>
+
+
+
+## ⚙️ Installation & Setup
+
+**Prerequisites:** Ensure you have [Node.js](https://nodejs.org/) installed (version 18, 20, or 22+ recommended).
+
+1. **Clone the repository:**
+```bash
+git clone https://github.com/aj1126/uap_analyticsbot.git
+cd uap_analyticsbot
+
+```
+
+
+2. **Install dependencies:**
+This project installs as a standard Node.js CLI package, so there are no extra native build steps required for the current worker-thread ingestion flow. Simply run:
+```bash
+npm install
+
+```
+
+
+3. **Verify the installation:**
+Run the local test suite to ensure the multithreaded worker pool and caching engine are functioning correctly on your machine:
+```bash
+npm test
+
+```
+
+
+*(If all tests pass green, you are ready to start analyzing documents!)*
+
+
+---
+
+<br>
+
+
+
+
+## Usage
+
+
+To run the AnalyticsBot, simply pass the target directory containing your text files as the first argument:
+
+```bash
+node src/index.js ./my_folder/
+
+```
+
+By default, this will parse the documents and output a formatted JSON report directly to your console.
+
+### 👀 Watch Mode
+
+Keep the pipeline running in the background. It will automatically re-analyze the documents and recalculate the math whenever you add, edit, or delete a file in the target directory:
+
+```bash
+node src/index.js ./my_folder/ --watch
+
+```
+
+### 🖨️ Report Generation
+
+Instead of dumping JSON directly to the console, you can generate formatted report files that are automatically saved to the `/data_exports/` directory:
+
+```bash
+node src/index.js ./my_folder/ --format=md
+
+```
+
+*(Supports `md` for Markdown or `csv` for spreadsheet datasets).*
+
+
+---
+<br>
+
+### 🚀 Advanced Usage
+
+The v1.2.0 AnalyticsBot engine supports multithreading and memoization caching. You can control these via CLI arguments:
+
+* `node src/index.js ./my_folder --workers=4` : Manually set the number of Node.js worker threads (defaults to max CPU cores).
+* `node src/index.js ./my_folder --clear-cache` : Bypasses the `.analytics_cache.json` file and forces a fresh read of all documents.
+* `node src/index.js ./my_folder --format=csv` : Exports the final report as a spreadsheet-compatible `.csv` file.
+
+<br>
+<br>
+<br>
+
+
+
 ## 🚀 Planned Technical Optimizations
 
 ### 1. Performance & Infrastructure
diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
index d8f5898..4f88bf4 100644
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -56,14 +56,16 @@ npm start -- "C:\Path\To\Folder" > analytics_report.json
 
 ## Supported File Types
 
-Currently, the ingestion engine natively parses the following text-based extensions:
+Currently, the ingestion engine natively parses the following extensions:
 * `.txt`
 * `.md`
 * `.json`
 * `.csv`
 * `.log`
-
-*(Note: Binary and multimedia extraction, such as PDF parsing and Image OCR, are tracked for a future development stage).*
+* `.pdf`
+* `.png`
+* `.jpg`
+* `.jpeg`
 
 ## Testing & Validation
 
diff --git a/docs/architecture.md b/docs/architecture.md
index c79a4eb..2346525 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -4,26 +4,31 @@
 
 The repository currently ships a Node.js CLI-centered analytics flow:
 
-1. **CLI Orchestrator (`src/index.js`)** resolves the source directory and writes the final report to stdout.
-2. **Read-Only Ingestion (`src/ingestion/file-ingestion.js`)** recursively scans supported text files, streams file content, and extracts words, dates, locations, and filesystem metadata.
+1. **CLI Orchestrator (`src/index.js`)** resolves the source directory, supports watch mode, and routes report output to stdout or export files.
+2. **Read-Only Ingestion (`src/ingestion/file-ingestion.js`)** recursively scans supported text files, dispatches parsing work to Node.js worker threads, memoizes compatible results in `.analytics_cache.json`, and extracts words, dates, locations, and filesystem metadata.
 3. **Analytics Pipeline (`src/pipeline.js`)** builds the descriptive, diagnostic, predictive, and prescriptive tiers from the ingested file set.
-4. **Output Layer** returns a single structured JSON report for the requested directory.
+4. **Output Layer** returns structured JSON or saves Markdown / CSV exports for the requested directory.
+
+### v1.2.0 Pipeline Architecture
+* **Ingestion (Multithreaded):** Utilizes Node.js `worker_threads` and file-stat fingerprinting (`.analytics_cache.json`) to bypass redundant processing and drastically speed up execution.
+* **Semantic Analytics:** Employs a TF-IDF weighting engine to filter generic stop-words and a Cosine Similarity math engine to automatically cluster related UAP documents based on vector distance.
 
 ## Current Runtime Boundaries
 
 Implemented today:
 
 - recursive read-only ingestion for `.txt`, `.md`, `.json`, `.csv`, and `.log`
+- multithreaded parsing with fingerprint-based cache reuse for compatible ingestions
 - tokenization plus lightweight date/location extraction
 - descriptive, diagnostic, predictive, and prescriptive analytics modules
-- JSON report delivery through the Node CLI
+- JSON, Markdown, and CSV report delivery through the Node CLI
+- directory watch mode that re-runs the pipeline after file changes
 
 Not yet implemented in the active system:
 
 - binary or multimedia extraction
 - Named Entity Recognition (NER)
-- dashboard or alternate export formats
-- background scheduling or directory watching
+- dashboard or background scheduling
 
 ## Planned Expansion
 
diff --git a/docs/docs-source.json b/docs/docs-source.json
index 568d598..2f8acb3 100644
--- a/docs/docs-source.json
+++ b/docs/docs-source.json
@@ -26,7 +26,7 @@
       "description": "Auto-generate CHANGELOG.md, bump the semantic version, and create a Git release tag based on conventional commit history."
     }
   ],
-  "supportedFileTypes": [".txt", ".md", ".json", ".csv", ".log", ".pdf"],
+  "supportedFileTypes": [".txt", ".md", ".json", ".csv", ".log", ".pdf", ".png", ".jpg", ".jpeg"],
   "repoLayout": [
     {
       "path": "src/index.js",
@@ -38,7 +38,7 @@
     },
     {
       "path": "src/ingestion/file-ingestion.js",
-      "description": "Read-only recursive file ingestion for supported text files."
+      "description": "Read-only recursive file ingestion for supported files."
     },
     {
       "path": "src/analytics/",
diff --git a/package-lock.json b/package-lock.json
index d1e5ba5..49baa7c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -16,7 +16,8 @@
         "tesseract.js": "^7.0.0"
       },
       "devDependencies": {
-        "commit-and-tag-version": "^12.7.3"
+        "commit-and-tag-version": "^12.7.3",
+        "husky": "^9.1.7"
       }
     },
     "node_modules/@babel/code-frame": {
@@ -1299,6 +1300,22 @@
         "node": ">=10"
       }
     },
+    "node_modules/husky": {
+      "version": "9.1.7",
+      "resolved": "https://registry.npmjs.org/husky/-/husky-9.1.7.tgz",
+      "integrity": "sha512-5gs5ytaNjBrh5Ow3zrvdUUY+0VxIuWVL4i9irt6friV+BqdCfmV11CQTWMiBYWHbXhco+J1kHfTOUkePhCDvMA==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "husky": "bin.js"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/typicode"
+      }
+    },
     "node_modules/idb-keyval": {
       "version": "6.2.5",
       "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.5.tgz",
diff --git a/package.json b/package.json
index 7cc9abd..b8c1b2c 100644
--- a/package.json
+++ b/package.json
@@ -6,9 +6,10 @@
   "main": "src/index.js",
   "scripts": {
     "start": "node src/index.js",
-    "test": "node --test",
+    "test": "node --test --experimental-test-coverage",
     "docs:generate": "node scripts/generate-docs.js",
     "docs:check": "node scripts/generate-docs.js --check && node scripts/validate-docs.js",
+    "prepare": "husky",
     "release": "commit-and-tag-version",
     "postrelease": "git push --follow-tags && gh release create v%npm_package_version% --notes-file CHANGELOG.md --title \"Release v%npm_package_version%\""
   },
@@ -29,6 +30,7 @@
     "tesseract.js": "^7.0.0"
   },
   "devDependencies": {
-    "commit-and-tag-version": "^12.7.3"
+    "commit-and-tag-version": "^12.7.3",
+    "husky": "^9.1.7"
   }
 }
diff --git a/src/analytics/descriptive.js b/src/analytics/descriptive.js
index 111739e..60ed4ff 100644
--- a/src/analytics/descriptive.js
+++ b/src/analytics/descriptive.js
@@ -1,34 +1,43 @@
-function countBy(items) {
-    return items.reduce((counts, item) => {
-        counts[item] = (counts[item] ?? 0) + 1;
-        return counts;
-    }, {});
-}
-
 function sortEntriesDescending(record) {
     return Object.entries(record).sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]));
 }
 
 function buildDescriptiveAnalytics(files) {
-    const allWords = files.flatMap((file) => file.words);
-    const allDates = files.flatMap((file) => file.dates);
-    const allLocations = files.flatMap((file) => file.locations);
+    const allDates = files.flatMap((file) => file.dates || []);
+    const allLocations = files.flatMap((file) => file.locations || []);
+
+    const globalWordFrequency = {};
+    const glossarySet = new Set();
 
-    const wordFrequency = countBy(allWords);
+    // Iterate through files using the new memory-efficient object format
+    files.forEach((file) => {
+        if (file.wordFrequency) {
+            for (const [word, count] of Object.entries(file.wordFrequency)) {
+                globalWordFrequency[word] = (globalWordFrequency[word] || 0) + count;
+                glossarySet.add(word);
+            }
+        } else if (file.words) { 
+            // Backwards compatibility layer
+            for (const word of file.words) {
+                globalWordFrequency[word] = (globalWordFrequency[word] || 0) + 1;
+                glossarySet.add(word);
+            }
+        }
+    });
 
     return {
         fileCount: files.length,
-        glossary: [...new Set(allWords)].sort(),
-        wordFrequency,
-        topWords: sortEntriesDescending(wordFrequency).slice(0, 10).map(([word, count]) => ({ word, count })),
+        glossary: [...glossarySet].sort(),
+        wordFrequency: globalWordFrequency,
+        topWords: sortEntriesDescending(globalWordFrequency).slice(0, 10).map(([word, count]) => ({ word, count })),
         dates: [...new Set(allDates)].sort(),
         locations: [...new Set(allLocations)].sort(),
         files: files.map((file) => ({
             path: file.relativePath,
-            extension: file.extension, // <-- FIX: Added extension propagation
+            extension: file.extension, 
             size: file.size,
             modifiedAt: file.modifiedAt,
-            wordCount: file.words.length,
+            wordCount: file.totalWords || (file.words ? file.words.length : 0),
             dates: file.dates,
             locations: file.locations,
             metadata: file.metadata || {} 
diff --git a/src/analytics/diagnostic.js b/src/analytics/diagnostic.js
index acbbaf6..1d63749 100644
--- a/src/analytics/diagnostic.js
+++ b/src/analytics/diagnostic.js
@@ -1,28 +1,22 @@
 function incrementNestedCount(target, firstKey, secondKey, amount = 1) {
-    if (!target[firstKey]) {
-        target[firstKey] = {};
-    }
-
+    if (!target[firstKey]) target[firstKey] = {};
     target[firstKey][secondKey] = (target[firstKey][secondKey] ?? 0) + amount;
 }
 
 function buildUsageRates(files, groupSelector) {
     const groupedCounts = {};
-
     for (const file of files) {
         const groups = groupSelector(file);
-        if (groups.length === 0 || file.words.length === 0) {
-            continue;
-        }
+        const uniqueWords = file.uniqueWords || (file.words ? [...new Set(file.words)] : []);
+        
+        if (!groups || groups.length === 0 || uniqueWords.length === 0) continue;
 
-        const uniqueWords = new Set(file.words);
         for (const group of groups) {
             for (const word of uniqueWords) {
                 incrementNestedCount(groupedCounts, group, word);
             }
         }
     }
-
     return Object.fromEntries(
         Object.entries(groupedCounts).map(([group, counts]) => {
             const total = Object.values(counts).reduce((sum, count) => sum + count, 0) || 1;
@@ -30,19 +24,102 @@ function buildUsageRates(files, groupSelector) {
                 .map(([word, count]) => ({ word, usageRate: Number((count / total).toFixed(4)) }))
                 .sort((left, right) => right.usageRate - left.usageRate || left.word.localeCompare(right.word))
                 .slice(0, 5);
-
             return [group, topWords];
         })
     );
 }
 
+function calculateCosineSimilarity(vecA, vecB) {
+    let dotProduct = 0;
+    let normA = 0;
+    let normB = 0;
+    
+    for (const word in vecA) {
+        dotProduct += (vecA[word] || 0) * (vecB[word] || 0);
+        normA += Math.pow(vecA[word], 2);
+    }
+    for (const word in vecB) {
+        normB += Math.pow(vecB[word], 2);
+    }
+    
+    if (normA === 0 || normB === 0) return 0;
+    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
+}
+
+function calculateTFIDF(files) {
+    const fileCount = files.length;
+    const documentFrequencies = {};
+
+    files.forEach(file => {
+        const unique = file.uniqueWords || (file.words ? [...new Set(file.words)] : []);
+        unique.forEach(word => { documentFrequencies[word] = (documentFrequencies[word] || 0) + 1; });
+    });
+
+    // Pass 1: Build Multidimensional Vectors
+    const vectorizedFiles = files.map(file => {
+        let tf = file.wordFrequency || {};
+        let totalWords = file.totalWords || 1;
+
+        // Backwards compatibility layer for un-cleared caches
+        if (!file.wordFrequency && file.words) {
+            file.words.forEach(word => { tf[word] = (tf[word] || 0) + 1; });
+            totalWords = file.words.length || 1;
+        }
+
+        const vector = {};
+        const tfidf = Object.keys(tf).map(word => {
+            const termFrequency = tf[word] / totalWords;
+            const inverseDocumentFrequency = Math.log(fileCount / (1 + documentFrequencies[word]));
+            const weight = termFrequency * inverseDocumentFrequency;
+            vector[word] = weight;
+            return { word, weight };
+        }).sort((a, b) => b.weight - a.weight);
+
+        return { ...file, topKeywords: tfidf.slice(0, 5).map(t => t.word), vector };
+    });
+
+    // ✨ Pass 2: Semantic Cross-Linking Loop (🚀 Optimised to prevent thread blocking)
+    const MAX_CROSS_REF = 500;
+    const targetFiles = vectorizedFiles.slice(0, MAX_CROSS_REF);
+    const relatedByIndex = Array.from({ length: vectorizedFiles.length }, () => []);
+
+    const getFileLabel = (file) => file.fileName || file.relativePath || 'unknown';
+    
+    for (let indexA = 0; indexA < targetFiles.length; indexA += 1) {
+        for (let indexB = indexA + 1; indexB < targetFiles.length; indexB += 1) {
+            const fileA = targetFiles[indexA];
+            const fileB = targetFiles[indexB];
+            const score = calculateCosineSimilarity(fileA.vector, fileB.vector);
+
+            if (score > 0.05) {
+                const correlationScore = Number(score.toFixed(4));
+                relatedByIndex[indexA].push({ match: getFileLabel(fileB), correlationScore });
+                relatedByIndex[indexB].push({ match: getFileLabel(fileA), correlationScore });
+            }
+        }
+    }
+
+    return vectorizedFiles.map((file, index) => {
+        const related = relatedByIndex[index]
+            ? relatedByIndex[index].sort((left, right) => right.correlationScore - left.correlationScore).slice(0, 3)
+            : [];
+
+        return {
+            fileName: getFileLabel(file),
+            topKeywords: file.topKeywords,
+            relatedDocuments: related
+        };
+    });
+}
+
 function buildDiagnosticAnalytics(files) {
+    const tfIdfAnalysis = calculateTFIDF(files);
+    
     return {
-        wordUsageByDate: buildUsageRates(files, (file) => file.dates),
-        wordUsageByLocation: buildUsageRates(files, (file) => file.locations)
+        wordUsageByDate: buildUsageRates(files, (file) => file.dates || []),
+        wordUsageByLocation: buildUsageRates(files, (file) => file.locations || []),
+        semanticAnalysis: tfIdfAnalysis
     };
 }
 
-module.exports = {
-    buildDiagnosticAnalytics
-};
+module.exports = { buildDiagnosticAnalytics };
\ No newline at end of file
diff --git a/src/analytics/predictive.js b/src/analytics/predictive.js
index 2634874..492e76b 100644
--- a/src/analytics/predictive.js
+++ b/src/analytics/predictive.js
@@ -1,30 +1,25 @@
-function monthKey(dateString) {
-    return dateString.slice(0, 7);
-}
-
-function average(values) {
-    if (values.length === 0) {
-        return 0;
-    }
-
-    return values.reduce((sum, value) => sum + value, 0) / values.length;
-}
+function monthKey(dateString) { return dateString.slice(0, 7); }
 
+// ✨ Nonlinear Forecasting Tweaks (Weighted Moving Average)
 function forecastNextValue(series) {
-    if (series.length === 0) {
-        return 0;
-    }
-
-    if (series.length === 1) {
-        return series[0].count;
-    }
+    if (series.length === 0) return 0;
+    if (series.length === 1) return series[0].count;
 
     const deltas = [];
     for (let index = 1; index < series.length; index += 1) {
         deltas.push(series[index].count - series[index - 1].count);
     }
 
-    return Math.max(0, Math.round(series[series.length - 1].count + average(deltas)));
+    let weightedSum = 0;
+    let weightTotal = 0;
+    for (let i = 0; i < deltas.length; i++) {
+        const weight = i + 1; // More recent intervals gain higher weight
+        weightedSum += deltas[i] * weight;
+        weightTotal += weight;
+    }
+
+    const wma = weightTotal === 0 ? 0 : weightedSum / weightTotal;
+    return Math.max(0, Math.round(series[series.length - 1].count + wma));
 }
 
 function addMonth(month) {
@@ -33,33 +28,50 @@ function addMonth(month) {
     return nextDate.toISOString().slice(0, 7);
 }
 
+// ✨ Support empty intervals
+function fillEmptyIntervals(orderedMonths, timeline) {
+    if (orderedMonths.length === 0) return [];
+    const filledSeries = [];
+    let currentMonth = orderedMonths[0];
+    const lastMonth = orderedMonths[orderedMonths.length - 1];
+
+    while (currentMonth <= lastMonth) {
+        filledSeries.push({
+            month: currentMonth,
+            count: timeline[currentMonth] ? timeline[currentMonth].totalWords : 0
+        });
+        currentMonth = addMonth(currentMonth);
+    }
+    return filledSeries;
+}
+
 function buildKeywordSeries(files) {
     const timeline = {};
-
     for (const file of files) {
-        const key = monthKey(file.modifiedAt);
-        if (!timeline[key]) {
-            timeline[key] = { totalWords: 0, locations: {} };
-        }
+        const documentDate = (file.dates || []).find((value) => /^[0-9]{4}-[0-9]{2}(?:-[0-9]{2})?$/.test(value)) || file.modifiedAt;
+        if (!documentDate) continue;
+
+        const key = monthKey(documentDate);
+        if (!timeline[key]) timeline[key] = { totalWords: 0, locations: {} };
 
-        timeline[key].totalWords += file.words.length;
-        for (const location of file.locations) {
+        timeline[key].totalWords += file.totalWords || (file.words || []).length;
+        for (const location of (file.locations || [])) {
             timeline[key].locations[location] = (timeline[key].locations[location] ?? 0) + 1;
         }
     }
-
     return timeline;
 }
 
 function buildPredictiveAnalytics(files) {
     const timeline = buildKeywordSeries(files);
     const orderedMonths = Object.keys(timeline).sort();
-    const keywordSeries = orderedMonths.map((month) => ({ month, count: timeline[month].totalWords }));
+    
+    const keywordSeries = fillEmptyIntervals(orderedMonths, timeline);
     const nextMonth = orderedMonths.length > 0 ? addMonth(orderedMonths[orderedMonths.length - 1]) : new Date().toISOString().slice(0, 7);
 
     const locationTotals = {};
     for (const month of orderedMonths) {
-        for (const [location, count] of Object.entries(timeline[month].locations)) {
+        for (const [location, count] of Object.entries(timeline[month]?.locations || {})) {
             locationTotals[location] = (locationTotals[location] ?? 0) + count;
         }
     }
@@ -68,18 +80,9 @@ function buildPredictiveAnalytics(files) {
         .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))[0]?.[0] ?? null;
 
     return {
-        keywordFrequencyForecast: {
-            basis: keywordSeries,
-            forecastMonth: nextMonth,
-            forecastWordCount: forecastNextValue(keywordSeries)
-        },
-        locationClusterForecast: {
-            basis: locationTotals,
-            likelyNextHotspot: topLocation
-        }
+        keywordFrequencyForecast: { basis: keywordSeries, forecastMonth: nextMonth, forecastWordCount: forecastNextValue(keywordSeries) },
+        locationClusterForecast: { basis: locationTotals, likelyNextHotspot: topLocation }
     };
 }
 
-module.exports = {
-    buildPredictiveAnalytics
-};
+module.exports = { buildPredictiveAnalytics };
\ No newline at end of file
diff --git a/src/analytics/prescriptive.js b/src/analytics/prescriptive.js
index d9ca602..c1fcde2 100644
--- a/src/analytics/prescriptive.js
+++ b/src/analytics/prescriptive.js
@@ -1,7 +1,8 @@
 function buildPrescriptiveAnalytics(files, descriptiveAnalytics) {
     const missingMetadataFiles = files
-        .filter((file) => file.dates.length === 0 || file.locations.length === 0)
-        .map((file) => file.relativePath);
+        .filter((file) => !file.dates?.length || !file.locations?.length)
+        // FIX: The worker pool returns `fileName`, so we map that instead (with a fallback)
+        .map((file) => file.fileName || file.relativePath);
 
     const recommendations = [];
 
@@ -13,7 +14,7 @@ function buildPrescriptiveAnalytics(files, descriptiveAnalytics) {
         });
     }
 
-    if (descriptiveAnalytics.locations.length > 1) {
+    if (descriptiveAnalytics.locations && descriptiveAnalytics.locations.length > 1) {
         recommendations.push({
             type: 'folder-restructure',
             message: 'Consider grouping files into location-based subfolders to improve topic clustering and navigation.',
@@ -35,4 +36,4 @@ function buildPrescriptiveAnalytics(files, descriptiveAnalytics) {
 
 module.exports = {
     buildPrescriptiveAnalytics
-};
+};
\ No newline at end of file
diff --git a/src/delivery/csv-generator.js b/src/delivery/csv-generator.js
new file mode 100644
index 0000000..45e328c
--- /dev/null
+++ b/src/delivery/csv-generator.js
@@ -0,0 +1,36 @@
+const fs = require('node:fs/promises');
+const path = require('node:path');
+
+function escapeCsvCell(value) {
+    const stringValue = String(value ?? '');
+    const sanitizedValue = /^\s*[=+\-@]/.test(stringValue) ? `'${stringValue}` : stringValue;
+    return `"${sanitizedValue.replace(/"/g, '""')}"`;
+}
+
+function buildCsvRow(...cells) {
+    return `${cells.map(escapeCsvCell).join(',')}\n`;
+}
+
+async function generateCsvReport(report, exportsDir) {
+    await fs.mkdir(exportsDir, { recursive: true });
+    const csvPath = path.join(exportsDir, `report-${Date.now()}.csv`);
+    
+    let csvContent = buildCsvRow('Category', 'Metric', 'Value');
+    csvContent += buildCsvRow('Descriptive', 'FileCount', report.descriptive.fileCount);
+    
+    const locations = report.descriptive.locations || report.locations || [];
+    csvContent += buildCsvRow('Descriptive', 'UniqueLocations', locations.join(', '));
+    
+    if (report.predictive?.locationClusterForecast) {
+         csvContent += buildCsvRow('Predictive', 'LikelyNextHotspot', report.predictive.locationClusterForecast.likelyNextHotspot);
+    }
+    if (report.predictive?.keywordFrequencyForecast) {
+         csvContent += buildCsvRow('Predictive', 'ForecastMonth', report.predictive.keywordFrequencyForecast.forecastMonth);
+         csvContent += buildCsvRow('Predictive', 'ForecastWordCount', report.predictive.keywordFrequencyForecast.forecastWordCount);
+    }
+
+    await fs.writeFile(csvPath, csvContent, 'utf-8');
+    return csvPath;
+}
+
+module.exports = { generateCsvReport };
\ No newline at end of file
diff --git a/src/index.js b/src/index.js
index 97b52f8..031fc69 100644
--- a/src/index.js
+++ b/src/index.js
@@ -3,14 +3,19 @@ const path = require('node:path');
 const chokidar = require('chokidar');
 const { generateAnalyticsReport } = require('./pipeline');
 const { generateMarkdownReport } = require('./delivery/markdown-generator');
+const { generateCsvReport } = require('./delivery/csv-generator');
 
-async function runPipeline(sourceDirectory, format) {
+async function runPipeline(sourceDirectory, format, options) {
     try {
-        const report = await generateAnalyticsReport(sourceDirectory);
+        const report = await generateAnalyticsReport(sourceDirectory, options);
+        const exportsDir = path.join(process.cwd(), 'data_exports');
+        
         if (format === 'md' || format === 'markdown') {
-            const exportsDir = path.join(process.cwd(), 'data_exports');
             const savedPath = await generateMarkdownReport(report, exportsDir);
             process.stdout.write(`✅ Markdown report successfully generated at:\n${savedPath}\n`);
+        } else if (format === 'csv') {
+            const savedPath = await generateCsvReport(report, exportsDir);
+            process.stdout.write(`✅ CSV report successfully generated at:\n${savedPath}\n`);
         } else {
             process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
         }
@@ -22,42 +27,48 @@ async function runPipeline(sourceDirectory, format) {
 async function main() {
     const args = process.argv.slice(2);
     
-    // Parse flags
     const formatFlag = args.find(arg => arg.startsWith('--format='));
     const format = formatFlag ? formatFlag.split('=')[1].toLowerCase() : 'json';
     const isWatchMode = args.includes('--watch');
+    const clearCache = args.includes('--clear-cache');
+    
+    const workersFlag = args.find(arg => arg.startsWith('--workers='));
+    let workers;
+    if (workersFlag) {
+        const parsed = parseInt(workersFlag.split('=')[1], 10);
+        if (Number.isNaN(parsed) || parsed < 1) {
+            process.stderr.write(`⚠️ Invalid --workers value. Must be a positive integer. Defaulting to CPU count.\n`);
+        } else {
+            workers = parsed;
+        }
+    }
     
-    // Parse target directory
     const sourceArg = args.find(arg => !arg.startsWith('--'));
     const sourceDirectory = sourceArg ? path.resolve(sourceArg) : process.cwd();
 
+    const options = { clearCache, workers };
+
     if (isWatchMode) {
         process.stdout.write(`👀 Watching directory for changes: ${sourceDirectory}\n`);
-        
-        // Initialize OS Event Listener
         const watcher = chokidar.watch(sourceDirectory, {
-            ignored: [/(^|[\/\\])\../, /node_modules/, /data_exports/],
-            persistent: true,
-            ignoreInitial: false
+            ignored: [/(^|[\/\\])\../, /node_modules/, /[\/\\]data_exports([\/\\]|$)/],
+            persistent: true, ignoreInitial: false
         });
 
-        // Debounce logic to prevent CPU spikes on bulk file operations
         let timeout;
+        let pipelineQueue = Promise.resolve();
         const triggerPipeline = () => {
             clearTimeout(timeout);
             timeout = setTimeout(() => {
                 process.stdout.write(`\n🔄 File system event detected. Recalculating analytics...\n`);
-                runPipeline(sourceDirectory, format);
-            }, 500); // 500ms buffer
+                pipelineQueue = pipelineQueue
+                    .then(() => runPipeline(sourceDirectory, format, options))
+                    .catch(() => {});
+            }, 500);
         };
-
-        // Bind events
-        watcher
-            .on('add', triggerPipeline)
-            .on('change', triggerPipeline)
-            .on('unlink', triggerPipeline);
+        watcher.on('add', triggerPipeline).on('change', triggerPipeline).on('unlink', triggerPipeline);
     } else {
-        await runPipeline(sourceDirectory, format);
+        await runPipeline(sourceDirectory, format, options);
     }
 }
 
diff --git a/src/ingestion/file-ingestion.js b/src/ingestion/file-ingestion.js
index afa1617..3d1ecb9 100644
--- a/src/ingestion/file-ingestion.js
+++ b/src/ingestion/file-ingestion.js
@@ -3,12 +3,33 @@ const os = require("node:os");
 const { promises: fsp } = require("node:fs");
 const { Worker } = require("node:worker_threads");
 
+const CACHE_SCHEMA_VERSION = 1;
+
+function parseCacheEntries(cacheData) {
+    const parsedCache = JSON.parse(cacheData);
+
+    if (
+        parsedCache &&
+        typeof parsedCache === 'object' &&
+        parsedCache.version === CACHE_SCHEMA_VERSION &&
+        parsedCache.entries &&
+        typeof parsedCache.entries === 'object' &&
+        !Array.isArray(parsedCache.entries)
+    ) {
+        return parsedCache.entries;
+    }
+
+    return {};
+}
+
 async function* walkFiles(rootDirectory) {
     const directoryEntries = await fsp.readdir(rootDirectory, { withFileTypes: true });
 
     for (const entry of directoryEntries) {
         const absolutePath = path.join(rootDirectory, entry.name);
-        if (entry.isDirectory()) {
+        if (entry.isSymbolicLink()) {
+            continue; // Skip symlinks to prevent traversal outside the source directory
+        } else if (entry.isDirectory()) {
             yield* walkFiles(absolutePath);
         } else if (entry.isFile()) {
             yield absolutePath;
@@ -16,70 +37,98 @@ async function* walkFiles(rootDirectory) {
     }
 }
 
-async function ingestDirectory(rootDirectory) {
+async function ingestDirectory(rootDirectory, options = {}) {
     const sourceDirectory = path.resolve(rootDirectory);
     const files = [];
     const pathsToProcess = [];
 
+    // State Caching (Memoization)
+    const cachePath = path.join(process.cwd(), '.analytics_cache.json');
+    let cache = {};
+    if (!options.clearCache) {
+        try {
+            const cacheData = await fsp.readFile(cachePath, 'utf-8');
+            cache = parseCacheEntries(cacheData);
+        } catch (err) {
+            cache = {};
+        }
+    }
+
+    const visitedPaths = new Set();
     for await (const filePath of walkFiles(sourceDirectory)) {
-        pathsToProcess.push(filePath);
+        visitedPaths.add(filePath);
+        const stats = await fsp.stat(filePath);
+        const fingerprint = `${stats.size}-${stats.mtimeMs}`; // Size + Modified Time
+        
+        if (cache[filePath] && cache[filePath].fingerprint === fingerprint) {
+            files.push(cache[filePath].data); // Short-circuit bypass
+        } else {
+            pathsToProcess.push({ filePath, fingerprint });
+        }
+    }
+
+    // Evict stale cache keys scoped to this sourceDirectory
+    for (const key of Object.keys(cache)) {
+        if (
+            (key === sourceDirectory || key.startsWith(sourceDirectory + path.sep)) &&
+            !visitedPaths.has(key)
+        ) {
+            delete cache[key];
+        }
     }
 
-    // FIX 1: Cap workers to the number of files. 
-    // Prevents spawning 15 massive threads to process 1 tiny test file.
-    const maxCores = Math.max(1, os.cpus().length - 1);
+    const maxCores = options.workers || Math.max(1, os.cpus().length - 1);
     const numWorkers = Math.min(pathsToProcess.length, maxCores);
     
-    if (numWorkers === 0) {
-        return { sourceDirectory, files };
-    }
+    if (numWorkers > 0) {
+        process.stdout.write(`\n🚀 Initializing WebAssembly Worker Pool (${numWorkers} threads)...\n`);
 
-    process.stdout.write(`\n🚀 Initializing WebAssembly Worker Pool (${numWorkers} threads)...\n`);
+        let currentIndex = 0;
 
-    let currentIndex = 0;
+        await Promise.all(
+            Array.from({ length: numWorkers }).map(() => {
+                return new Promise((resolve) => {
+                    const worker = new Worker(path.join(__dirname, "worker.js"));
 
-    await Promise.all(
-        Array.from({ length: numWorkers }).map(() => {
-            return new Promise((resolve) => {
-                const worker = new Worker(path.join(__dirname, "worker.js"));
+                    worker.on("message", (msg) => {
+                        if (msg.success && msg.result) {
+                            files.push(msg.result);
+                            cache[msg.filePath] = { fingerprint: msg.fingerprint, data: msg.result };
+                        } else if (!msg.success) {
+                            process.stderr.write(`\n⚠️ File failed (${msg.filePath}): ${msg.error}\n`);
+                        }
+                        assignNextTask();
+                    });
 
-                worker.on("message", (msg) => {
-                    if (msg.success && msg.result) {
-                        files.push(msg.result);
-                    } else if (!msg.success) {
-                        process.stderr.write(`\n⚠️ File failed (${msg.filePath}): ${msg.error}\n`);
+                    worker.on("error", (err) => {
+                        process.stderr.write(`\n⚠️ Fatal Worker Crash: ${err.message}\n`);
+                        worker.terminate().then(resolve);
+                    });
+
+                    function assignNextTask() {
+                        if (currentIndex >= pathsToProcess.length) {
+                            worker.terminate().then(resolve);
+                            return;
+                        }
+                        const task = pathsToProcess[currentIndex++];
+                        worker.postMessage({ filePath: task.filePath, fingerprint: task.fingerprint, rootDirectory: sourceDirectory });
                     }
-                    assignNextTask();
-                });
 
-                worker.on("error", (err) => {
-                    process.stderr.write(`\n⚠️ Fatal Worker Crash: ${err.message}\n`);
-                    // FIX 2: Await thread termination so it doesn't leave dangling memory leaks
-                    worker.terminate().then(resolve);
+                    assignNextTask();
                 });
+            })
+        );
+    }
 
-                function assignNextTask() {
-                    if (currentIndex >= pathsToProcess.length) {
-                        // FIX 2: Await thread termination to clear the Node.js event loop
-                        worker.terminate().then(resolve);
-                        return;
-                    }
-                    
-                    const filePath = pathsToProcess[currentIndex++];
-                    worker.postMessage({ filePath, rootDirectory: sourceDirectory });
-                }
-
-                assignNextTask();
-            });
-        })
+    // Save newly parsed data back to .analytics_cache.json
+    const tempCachePath = `${cachePath}.${process.pid}.${Date.now()}.tmp`;
+    await fsp.writeFile(
+        tempCachePath,
+        JSON.stringify({ version: CACHE_SCHEMA_VERSION, entries: cache }, null, 2)
     );
+    await fsp.rename(tempCachePath, cachePath);
 
-    return {
-        sourceDirectory,
-        files,
-    };
+    return { sourceDirectory, files };
 }
 
-module.exports = {
-    ingestDirectory,
-};
\ No newline at end of file
+module.exports = { ingestDirectory };
\ No newline at end of file
diff --git a/src/ingestion/worker.js b/src/ingestion/worker.js
index 88f4c49..d333a2c 100644
--- a/src/ingestion/worker.js
+++ b/src/ingestion/worker.js
@@ -1,124 +1,133 @@
-const { parentPort } = require("node:worker_threads");
-const fs = require("node:fs");
-const path = require("node:path");
-const readline = require("node:readline");
-const { promises: fsp } = require("node:fs");
-const nlp = require("compromise");
+const path = require('node:path');
+const { parentPort } = require('node:worker_threads');
+const fs = require('node:fs');
+const fsp = require('node:fs/promises');
+const readline = require('node:readline');
+const nlp = require('compromise');
 
-// Protect the background V8 isolate from abrupt asynchronous library crashes
-process.on("unhandledRejection", (reason) => {
-    parentPort.postMessage({ success: false, error: reason?.message || String(reason) });
-});
-
-const TEXT_EXTENSIONS = new Set([".txt", ".md", ".json", ".csv", ".log"]);
-const IMAGE_EXTENSIONS = new Set([".png", ".jpg", ".jpeg"]);
-const SUPPORTED_EXTENSIONS = new Set([...TEXT_EXTENSIONS, ...IMAGE_EXTENSIONS, ".pdf"]);
+const TEXT_EXTENSIONS = new Set(['.txt', '.md', '.json', '.csv', '.log']);
+const IMAGE_EXTENSIONS = new Set(['.png', '.jpg', '.jpeg']);
+const SUPPORTED_EXTENSIONS = new Set([...TEXT_EXTENSIONS, ...IMAGE_EXTENSIONS, '.pdf']);
 
+// ✨ Advanced Stop-Word Culling Dictionary
 const STOP_WORDS = new Set([
-    "the", "of", "to", "and", "in", "a", "for", "on", "that", "is", "it", 
-    "with", "as", "was", "at", "by", "be", "this", "an", "are", "from", 
-    "or", "which", "will", "not", "have", "has", "but", "they", "their", 
-    "we", "you", "i", "he", "she", "my", "his", "her", "its", "our", "your",
-    "there", "can", "if", "would", "about", "who", "what", "where", "when", "how"
+    'a', 'about', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
+    'how', 'i', 'in', 'is', 'it', 'of', 'on', 'or', 'that', 'the', 'this',
+    'to', 'was', 'what', 'when', 'where', 'who', 'will', 'with'
 ]);
 
-function normalizeWords(text) {
-    const rawWords = text.toLowerCase().match(/[a-z0-9']+/g) ?? [];
-    return rawWords.filter(word => !STOP_WORDS.has(word) && isNaN(word) && word.length > 1);
-}
+parentPort.on('message', async (task) => {
+    try {
+        const extension = path.extname(task.filePath).toLowerCase();
+        if (!SUPPORTED_EXTENSIONS.has(extension)) {
+            parentPort.postMessage({
+                success: true,
+                filePath: task.filePath,
+                fingerprint: task.fingerprint
+            });
+            return;
+        }
 
-function extractDates(text) {
-    const doc = nlp(text);
-    return [...new Set(doc.match("#Date").out("array"))];
-}
+        const stats = await fsp.stat(task.filePath);
+        
+        const dates = new Set();
+        const locations = new Set();
+        const wordFrequency = {};
+        let totalWords = 0;
 
-function extractLocations(text) {
-    const doc = nlp(text);
-    const knownPlaces = doc.match("#Place").out("array");
-    const contextualPlaces = doc.match("(in|at|near|location) #ProperNoun").not("(in|at|near|location)").out("array");
-    return [...new Set([...knownPlaces, ...contextualPlaces])];
-}
+        const processTextChunk = (text) => {
+            if (!text) return;
 
-async function processTextData(text, words, dates, locations) {
-    if (!text) return;
-    words.push(...normalizeWords(text));
-    extractDates(text).forEach(date => dates.add(date));
-    extractLocations(text).forEach(loc => locations.add(loc));
-}
+            const rawWords = text
+                .replace(/[^\w\s]/g, '')
+                .toLowerCase()
+                .split(/\s+/)
+                .filter(word => word.length > 1 && !STOP_WORDS.has(word) && !/^\d+$/.test(word));
 
-async function readFileData(filePath, rootDirectory) {
-    const extension = path.extname(filePath).toLowerCase();
-    if (!SUPPORTED_EXTENSIONS.has(extension)) return null;
+            // 🚀 OPTIMIZATION: Calculate map inside worker to drastically reduce IPC channel memory usage
+            for (const word of rawWords) {
+                wordFrequency[word] = (wordFrequency[word] || 0) + 1;
+            }
+            totalWords += rawWords.length;
 
-    const stats = await fsp.stat(filePath);
-    const words = [];
-    const dates = new Set();
-    const locations = new Set();
-    let metadata = {}; 
+            const doc = nlp(text);
+            for (const value of doc.match('#Date').out('array')) {
+                dates.add(value);
+            }
+            for (const value of doc.match('#Place').out('array')) {
+                locations.add(value);
+            }
 
-    if (TEXT_EXTENSIONS.has(extension)) {
-        const stream = fs.createReadStream(filePath, { encoding: "utf8" });
-        const lineReader = readline.createInterface({ input: stream, crlfDelay: Infinity });
-        for await (const line of lineReader) await processTextData(line, words, dates, locations);
-        stream.destroy();
-    } else if (extension === ".pdf") {
-        const dataBuffer = await fsp.readFile(filePath);
-        let extractedText = "";
+            for (const match of text.matchAll(/Date:\s*([0-9]{4}-[0-9]{2}-[0-9]{2})/gi)) {
+                dates.add(match[1]);
+            }
+            for (const match of text.matchAll(/Location:\s*([A-Za-z][A-Za-z\s'-]*)/gi)) {
+                locations.add(match[1].trim());
+            }
+        };
 
-        try {
-            const pdfParse = require("pdf-parse"); 
-            const parseFn = typeof pdfParse === "function" ? pdfParse : pdfParse.default;
-            const pdfData = await parseFn(dataBuffer);
-            extractedText = pdfData.text || "";
-            metadata = pdfData.info || {};
-        } catch (err) { /* OCR Fallback fallback loop logic flags */ }
+        const processTextFile = async () => {
+            const fileStream = fs.createReadStream(task.filePath, { encoding: 'utf-8' });
+            const lines = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
 
-        if (extractedText.trim().length < 50) {
-            const tail = dataBuffer.toString("utf8", Math.max(0, dataBuffer.length - 1024));
-            
-            if (tail.includes("%%EOF") || tail.includes("startxref")) {
-                try {
-                    const mupdf = await import("mupdf");
-                    const tesseract = require("tesseract.js");
-                    
-                    const doc = mupdf.Document.openDocument(dataBuffer, "application/pdf");
-                    let ocrText = ""; 
-                    for (let i = 0; i < doc.countPages(); i++) {
-                        const page = doc.loadPage(i);
-                        const pixmap = page.toPixmap(mupdf.Matrix.scale(2, 2), mupdf.ColorSpace.DeviceRGB, false);
-                        const { data: { text } } = await tesseract.recognize(Buffer.from(pixmap.asPNG()), "eng", { logger: () => {} });
-                        ocrText += text + " ";
-                    }
-                    if (ocrText.trim().length > 0) extractedText = ocrText;
-                } catch (ocrError) { /* Fail safely over to parsed text metadata arrays */ }
+            try {
+                for await (const line of lines) {
+                    processTextChunk(line);
+                }
+            } finally {
+                lines.close();
+                fileStream.destroy();
             }
-        }
-        await processTextData(extractedText, words, dates, locations);
-    } else if (IMAGE_EXTENSIONS.has(extension)) {
-        const tesseract = require("tesseract.js");
-        const { data: { text } } = await tesseract.recognize(filePath, "eng", { logger: () => {} });
-        await processTextData(text, words, dates, locations);
-    }
+        };
 
-    return {
-        path: filePath,
-        relativePath: path.relative(rootDirectory, filePath),
-        extension,
-        size: stats.size,
-        createdAt: stats.birthtime.toISOString(),
-        modifiedAt: stats.mtime.toISOString(),
-        words,
-        dates: [...dates],
-        locations: [...locations],
-        metadata, 
-    };
-}
+        const processPdfFile = async () => {
+            try {
+                const pdfParse = require('pdf-parse');
+                const parseFn = typeof pdfParse === 'function' ? pdfParse : pdfParse.default;
+                const dataBuffer = await fsp.readFile(task.filePath);
+                const pdfResult = await parseFn(dataBuffer);
+                processTextChunk(pdfResult?.text || '');
+            } catch (error) {
+                process.stderr.write(`\n⚠️ PDF extraction skipped (${task.filePath}): ${error.message}\n`);
+            }
+        };
 
-parentPort.on("message", async ({ filePath, rootDirectory }) => {
-    try {
-        const result = await readFileData(filePath, rootDirectory);
-        parentPort.postMessage({ success: true, result });
+        const processImageFile = async () => {
+            try {
+                const tesseract = require('tesseract.js');
+                const result = await tesseract.recognize(task.filePath, 'eng', { logger: () => {} });
+                processTextChunk(result?.data?.text || '');
+            } catch (error) {
+                process.stderr.write(`\n⚠️ Image OCR skipped (${task.filePath}): ${error.message}\n`);
+            }
+        };
+
+        if (TEXT_EXTENSIONS.has(extension)) {
+            await processTextFile();
+        } else if (extension === '.pdf') {
+            await processPdfFile();
+        } else if (IMAGE_EXTENSIONS.has(extension)) {
+            await processImageFile();
+        }
+
+        parentPort.postMessage({
+            success: true,
+            filePath: task.filePath,
+            fingerprint: task.fingerprint,
+            result: {
+                fileName: task.filePath.split(/[/\\]/).pop(),
+                relativePath: task.rootDirectory ? path.relative(task.rootDirectory, task.filePath) : task.filePath,
+                extension,
+                size: stats.size,
+                modifiedAt: stats.mtime.toISOString(),
+                wordFrequency, 
+                totalWords,
+                uniqueWords: Object.keys(wordFrequency),
+                dates: [...dates],
+                locations: [...locations]
+            }
+        });
     } catch (error) {
-        parentPort.postMessage({ success: false, error: error.message, filePath });
+        parentPort.postMessage({ success: false, filePath: task.filePath, error: error.message });
     }
 });
\ No newline at end of file
diff --git a/src/pipeline.js b/src/pipeline.js
index 36b06db..837481c 100644
--- a/src/pipeline.js
+++ b/src/pipeline.js
@@ -4,8 +4,8 @@ const { buildDiagnosticAnalytics } = require('./analytics/diagnostic');
 const { buildPredictiveAnalytics } = require('./analytics/predictive');
 const { buildPrescriptiveAnalytics } = require('./analytics/prescriptive');
 
-async function generateAnalyticsReport(sourceDirectory) {
-    const ingestionResult = await ingestDirectory(sourceDirectory);
+async function generateAnalyticsReport(sourceDirectory, options = {}) {
+    const ingestionResult = await ingestDirectory(sourceDirectory, options);
     const descriptive = buildDescriptiveAnalytics(ingestionResult.files);
 
     return {
@@ -17,6 +17,4 @@ async function generateAnalyticsReport(sourceDirectory) {
     };
 }
 
-module.exports = {
-    generateAnalyticsReport
-};
+module.exports = { generateAnalyticsReport };
\ No newline at end of file
diff --git a/test/ingestion-regressions.test.js b/test/ingestion-regressions.test.js
new file mode 100644
index 0000000..3496e18
--- /dev/null
+++ b/test/ingestion-regressions.test.js
@@ -0,0 +1,73 @@
+const test = require('node:test');
+const assert = require('node:assert/strict');
+const fs = require('node:fs/promises');
+const os = require('node:os');
+const path = require('node:path');
+
+const { ingestDirectory } = require('../src/ingestion/file-ingestion');
+const { generateAnalyticsReport } = require('../src/pipeline');
+
+test('watch mode ignores data_exports directory and descendants', async () => {
+    const indexSource = await fs.readFile(path.join(__dirname, '..', 'src', 'index.js'), 'utf-8');
+
+    assert.ok(indexSource.includes('/[\\/\\\\]data_exports([\\/\\\\]|$)/'));
+    assert.ok(!indexSource.includes('/data_exports[\\/\\\\]?$/'));
+});
+
+test('cache eviction does not remove sibling directory entries', async () => {
+    const cwdBefore = process.cwd();
+    const workspace = await fs.mkdtemp(path.join(os.tmpdir(), 'uap-cache-'));
+    const sourceDirectory = path.join(workspace, 'UAP_Data');
+    const siblingDirectory = path.join(workspace, 'UAP_Data_Archive');
+    const liveFile = path.join(sourceDirectory, 'live.txt');
+    const staleSourceFile = path.join(sourceDirectory, 'stale.txt');
+    const staleSiblingFile = path.join(siblingDirectory, 'stale.txt');
+
+    try {
+        await fs.mkdir(sourceDirectory, { recursive: true });
+        await fs.mkdir(siblingDirectory, { recursive: true });
+        await fs.writeFile(liveFile, 'Roswell event on 2024-01-01');
+
+        await fs.writeFile(
+            path.join(workspace, '.analytics_cache.json'),
+            JSON.stringify(
+                {
+                    version: 1,
+                    entries: {
+                        [staleSourceFile]: { fingerprint: 'old', data: { fileName: 'stale.txt' } },
+                        [staleSiblingFile]: { fingerprint: 'old', data: { fileName: 'stale.txt' } },
+                    },
+                },
+                null,
+                2
+            )
+        );
+
+        process.chdir(workspace);
+        await ingestDirectory(sourceDirectory, { workers: 1 });
+
+        const cache = JSON.parse(await fs.readFile(path.join(workspace, '.analytics_cache.json'), 'utf-8'));
+        assert.equal(cache.entries[staleSourceFile], undefined);
+        assert.ok(cache.entries[staleSiblingFile]);
+    } finally {
+        process.chdir(cwdBefore);
+        await fs.rm(workspace, { recursive: true, force: true });
+    }
+});
+
+test('worker NLP extraction captures natural-language dates and places', async () => {
+    const fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'uap-nlp-'));
+
+    try {
+        await fs.writeFile(
+            path.join(fixtureRoot, 'observation.txt'),
+            'Witnesses reported unusual movement on 2024-03-05 near Phoenix in Arizona.'
+        );
+
+        const report = await generateAnalyticsReport(fixtureRoot, { workers: 1, clearCache: true });
+        assert.ok(report.descriptive.dates.length > 0);
+        assert.ok(report.descriptive.locations.includes('Phoenix'));
+    } finally {
+        await fs.rm(fixtureRoot, { recursive: true, force: true });
+    }
+});
diff --git a/test/pipeline.test.js b/test/pipeline.test.js
index fb5e54f..6c06805 100644
--- a/test/pipeline.test.js
+++ b/test/pipeline.test.js
@@ -5,6 +5,8 @@ const os = require('node:os');
 const path = require('node:path');
 
 const { generateAnalyticsReport } = require('../src/pipeline');
+const { buildDiagnosticAnalytics } = require('../src/analytics/diagnostic');
+const { generateCsvReport } = require('../src/delivery/csv-generator');
 
 async function createFixtureDirectory() {
     const fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'uap-analytics-'));
@@ -65,24 +67,107 @@ test('generateAnalyticsReport flags files with missing metadata for prescriptive
     }
 });
 
-test('generateAnalyticsReport builds all analytics tiers from text files', async () => {
-    const fixtureRoot = await createFixtureDirectory();
+test('generateAnalyticsReport passes ingestion options through to the pipeline', async () => {
+    const ingestionModulePath = require.resolve('../src/ingestion/file-ingestion');
+    const pipelineModulePath = require.resolve('../src/pipeline');
+    const originalIngestionModule = require.cache[ingestionModulePath];
+    const originalPipelineModule = require.cache[pipelineModulePath];
+    let receivedOptions;
+
+    delete require.cache[pipelineModulePath];
+    require.cache[ingestionModulePath] = {
+        id: ingestionModulePath,
+        filename: ingestionModulePath,
+        loaded: true,
+        exports: {
+            ingestDirectory: async (_sourceDirectory, options) => {
+                receivedOptions = options;
+                return {
+                    sourceDirectory: '/tmp/mock-source',
+                    files: [
+                        {
+                            fileName: 'fixture.txt',
+                            locations: ['Roswell'],
+                            dates: ['2024-01-01'],
+                            wordFrequency: { sighting: 1 },
+                            totalWords: 1,
+                            uniqueWords: ['sighting']
+                        }
+                    ],
+                };
+            }
+        }
+    };
 
     try {
-        const report = await generateAnalyticsReport(fixtureRoot);
+        const { generateAnalyticsReport: generateMockedAnalyticsReport } = require('../src/pipeline');
+        const report = await generateMockedAnalyticsReport('/tmp/mock-source', { workers: 4, clearCache: true });
 
-        assert.equal(report.descriptive.fileCount, 2);
-        assert.deepEqual(report.descriptive.locations, ['Phoenix', 'Roswell']);
-        
-        // Use an OR condition to support both object paths during transition
-        const dates = report.descriptive.dates || report.dates;
-        assert.deepEqual(dates, ['2024-01-01', '2024-02-14']);
-        
-        assert.ok(report.descriptive.wordFrequency.location >= 2);
-        assert.ok(report.diagnostic.wordUsageByLocation.Roswell.length > 0);
-        assert.equal(report.predictive.locationClusterForecast.likelyNextHotspot, 'Phoenix');
-        assert.equal(report.prescriptive.recommendations[0].type, 'folder-restructure');
+        assert.equal(report.sourceDirectory, '/tmp/mock-source');
+        assert.equal(report.descriptive.fileCount, 1);
+        assert.deepEqual(receivedOptions, { workers: 4, clearCache: true });
     } finally {
-        await fs.rm(fixtureRoot, { recursive: true, force: true });
+        if (originalIngestionModule) {
+            require.cache[ingestionModulePath] = originalIngestionModule;
+        } else {
+            delete require.cache[ingestionModulePath];
+        }
+
+        if (originalPipelineModule) {
+            require.cache[pipelineModulePath] = originalPipelineModule;
+        } else {
+            delete require.cache[pipelineModulePath];
+        }
+    }
+});
+
+test('buildDiagnosticAnalytics falls back to relative paths when file names are missing', () => {
+    const diagnostic = buildDiagnosticAnalytics([
+        {
+            relativePath: 'reports/alpha.txt',
+            wordFrequency: { signal: 2, light: 1 },
+            totalWords: 3,
+            uniqueWords: ['signal', 'light']
+        },
+        {
+            relativePath: 'reports/beta.txt',
+            wordFrequency: { signal: 2, glow: 1 },
+            totalWords: 3,
+            uniqueWords: ['signal', 'glow']
+        }
+    ]);
+
+    assert.equal(diagnostic.semanticAnalysis[0].fileName, 'reports/alpha.txt');
+    assert.equal(diagnostic.semanticAnalysis[0].relatedDocuments[0].match, 'reports/beta.txt');
+});
+
+test('generateCsvReport escapes spreadsheet-sensitive values', async () => {
+    const exportsDir = await fs.mkdtemp(path.join(os.tmpdir(), 'uap-analytics-csv-'));
+
+    try {
+        const csvPath = await generateCsvReport(
+            {
+                descriptive: {
+                    fileCount: 1,
+                    locations: ['=cmd|" /C calc"!A0', 'Phoenix, AZ']
+                },
+                predictive: {
+                    locationClusterForecast: {
+                        likelyNextHotspot: '@hidden'
+                    },
+                    keywordFrequencyForecast: {
+                        forecastMonth: '2026-06',
+                        forecastWordCount: 3
+                    }
+                }
+            },
+            exportsDir
+        );
+
+        const csvContent = await fs.readFile(csvPath, 'utf-8');
+        assert.match(csvContent, /"'=cmd\|"" \/C calc""!A0, Phoenix, AZ"/);
+        assert.match(csvContent, /"'\@hidden"/);
+    } finally {
+        await fs.rm(exportsDir, { recursive: true, force: true });
     }
 });
\ No newline at end of file