diff --git a/nlp-service/requirements.txt b/nlp-service/requirements.txt index 5246e18..7ad41db 100644 --- a/nlp-service/requirements.txt +++ b/nlp-service/requirements.txt @@ -1,30 +1,30 @@ # Kompl v2 nlp-service — commit 4. Pins per docs/research/2026-04-08-conversion-deps.md # and docs/research/2026-04-09-llm-compile.md. -fastapi==0.136.1 -uvicorn[standard]==0.47.0 +fastapi==0.137.0 +uvicorn[standard]==0.49.0 pydantic==2.13.4 httpx==0.28.1 -markitdown[all]==0.1.5 +markitdown[all]==0.1.6 # Promoted to a direct dep so routers/conversion.py can call # youtube-transcript-api WITHOUT going through MarkItDown for YouTube URLs. # Reason: MarkItDown's YouTubeConverter silently falls back to scraping the # watch-page HTML when no transcript exists (returns ~800 chars of footer # chrome — verified live on session 4a00f339). We need the transcript-vs-no- # transcript signal explicitly, which only the direct API exposes. -# markitdown[all]==0.1.5 constrains ~=1.0.0; Dockerfile upgrades to 1.2.x +# markitdown[all]==0.1.6 constrains ~=1.0.0; Dockerfile upgrades to 1.2.x # after pip install (1.0.x breaks on current YouTube timedtext responses). youtube-transcript-api~=1.0.0 # Commit 4: LLM compile. Use google-genai NOT google-generativeai (deprecated 2025-Q1). -google-genai==2.4.0 +google-genai==2.8.0 # Commit 4: async token-bucket rate limiter. Single uvicorn worker only — # InMemoryBucket is process-local; see research artifact section 3. -pyrate-limiter==3.7.0 +pyrate-limiter==3.9.0 # Part 2a: multi-layer NLP extraction pipeline. spacy==3.8.14 rake-nltk==1.0.6 yake==0.7.3 keybert==0.9.0 -sentence-transformers==5.5.0 +sentence-transformers==5.5.1 # Dockerfile pins torch==2.6.0 (CPU wheel). torch 2.5.1 breaks transformers # 5.x imports; torch >=2.7 breaks sentence-transformers meta-device init # (NotImplementedError on .to(device)). @@ -33,7 +33,7 @@ sentence-transformers==5.5.0 transformers==5.9.0 # pytextrank: spaCy-native TextRank component (PyPI-available, no git dep). pytextrank==3.3.0 -scikit-learn==1.8.0 +scikit-learn==1.9.0 # nltk: required by rake-nltk (stopwords + punkt tokenizer data). nltk==3.9.4 # Part 2b: fast Levenshtein/Jaro-Winkler matching for entity resolution. @@ -48,5 +48,5 @@ chromadb==0.4.24 # NumPy 2.0 removed. Pin numpy<2 until chromadb is upgraded. numpy<2 # Testing -pytest==9.0.3 +pytest==9.1.0 pytest-mock==3.15.1