Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions content_refiner_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#!/usr/bin/env python3
"""
Simple Content Refiner - Processes existing text files without GUI
"""

import os
import sys
import time
import json
from pathlib import Path
from datetime import datetime
import requests

class SimpleContentRefiner:
def __init__(self):
self.api_key = os.getenv("OPENROUTER_API_KEY")
self.output_dir = Path("content_refiner_output")
self.output_dir.mkdir(exist_ok=True)

# OpenRouter API endpoint
self.api_url = "https://openrouter.ai/api/v1/chat/completions"
self.model = "google/gemini-2.5-pro"

# File processing settings
self.max_file_size = 50000 # 50KB max per file
self.processed_files = []

def call_llm(self, prompt, max_tokens=1500):
"""Call OpenRouter API for content analysis"""
if not self.api_key:
print("Warning: No OpenRouter API key found. Skipping AI analysis.")
return None

headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/content-refiner",
"X-Title": "Content Refiner"
}

data = {
"model": self.model,
"messages": [
{"role": "user", "content": prompt}
],
"max_tokens": max_tokens,
"temperature": 0.3
}

try:
response = requests.post(self.api_url, headers=headers, json=data)
response.raise_for_status()
result = response.json()
return result['choices'][0]['message']['content']
except Exception as e:
print(f"API Error: {str(e)}")
return None

def analyze_content(self, content, filename):
"""Analyze content and extract key information"""
prompt = f"""Analyze this text content from file: {filename}

Content:
{content[:45000]}

Provide a structured analysis with:

1. SUMMARY: A concise 2-3 sentence summary
2. KEYWORDS: List 10 important keywords/phrases
3. CATEGORY: Classify into one category (technology, business, education, health, etc.)
4. THEMES: List 3-5 main themes
5. QUALITY_SCORE: Rate content quality 1-10
6. REFINEMENT_SUGGESTIONS: 3 specific suggestions to improve the content

Format as JSON."""

analysis = self.call_llm(prompt)
if analysis:
try:
# Try to parse as JSON, fallback to text if needed
import json
return json.loads(analysis)
except:
return {"raw_analysis": analysis}
return None

def process_file(self, file_path):
"""Process a single text file"""
try:
print(f"Processing: {file_path}")

# Read file content
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()

if not content.strip():
print(f"Empty file skipped: {file_path}")
return

# Truncate if too large
if len(content) > self.max_file_size:
content = content[:self.max_file_size] + "..."
print(f"File truncated to {self.max_file_size} characters")

# Analyze content
analysis = self.analyze_content(content, file_path.name)

# Create refined output
refined_data = {
"original_file": str(file_path),
"processed_date": datetime.now().isoformat(),
"file_size": len(content),
"analysis": analysis,
"content_preview": content[:500] + "..." if len(content) > 500 else content
}

# Save refined analysis
output_file = self.output_dir / f"{file_path.stem}_refined.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(refined_data, f, indent=2, ensure_ascii=False)

self.processed_files.append(str(file_path))
print(f"✓ Refined: {file_path.name} -> {output_file.name}")

# Rate limiting
time.sleep(1)

except Exception as e:
print(f"Error processing {file_path}: {e}")

def find_text_files(self, directory="."):
"""Find all text files in directory"""
text_files = []
for ext in ["*.txt", "*.md", "*.text"]:
text_files.extend(Path(directory).glob(f"**/{ext}"))
return text_files

def run(self):
"""Run the content refiner on existing files"""
print("=" * 60)
print("Simple Content Refiner Starting...")
print("=" * 60)

# Find text files
text_files = self.find_text_files()
print(f"Found {len(text_files)} text files to process")

if not text_files:
print("No text files found to process.")
return

# Process each file
for file_path in text_files:
if file_path.stat().st_size > 0: # Skip empty files
self.process_file(file_path)

# Create summary report
self.create_summary_report()

print("\n" + "=" * 60)
print("Content Refining Complete!")
print(f"Processed {len(self.processed_files)} files")
print(f"Output saved to: {self.output_dir.absolute()}")
print("=" * 60)

def create_summary_report(self):
"""Create a summary report of all processed files"""
summary_file = self.output_dir / "refinement_summary.txt"

with open(summary_file, 'w', encoding='utf-8') as f:
f.write("Content Refinement Summary Report\n")
f.write("=" * 50 + "\n\n")
f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"Total files processed: {len(self.processed_files)}\n\n")

f.write("Processed Files:\n")
f.write("-" * 20 + "\n")
for file_path in self.processed_files:
f.write(f"• {file_path}\n")

f.write(f"\nRefined data saved in: {self.output_dir.absolute()}\n")

print(f"Summary report created: {summary_file}")

if __name__ == "__main__":
refiner = SimpleContentRefiner()
refiner.run()
22 changes: 7 additions & 15 deletions text_organizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,30 +280,22 @@ def start_monitoring(self):
observer.join()

def main():
# Get source folder from user or use current directory
source_folder = input("Enter the path to the folder containing txt files (or press Enter for current directory): ").strip()

if not source_folder:
source_folder = "."

# Use current directory as default
source_folder = "."
source_path = Path(source_folder)

if not source_path.exists():
print(f"Error: Folder '{source_folder}' does not exist.")
return


print(f"Text File Organizer starting...")
print(f"Source folder: {source_path.absolute()}")

organizer = TextOrganizer(source_path)

# Process existing files first
organizer.process_existing_files()

print("\nInitial processing complete!")
print("Now monitoring for new txt files...")
print("Press Ctrl+C to stop monitoring.")

# Start monitoring for new files
organizer.start_monitoring()

Expand Down