Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions src/neurostack/ask.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import httpx

from .config import get_config
from .config import _auth_headers, get_config
from .search import hybrid_search

ASK_PROMPT = """You are a knowledge assistant answering questions \
Expand Down Expand Up @@ -68,23 +68,22 @@ def ask_vault(
sources_text = "\n\n---\n\n".join(source_blocks)
prompt = ASK_PROMPT.format(sources=sources_text, question=question)

# Call Ollama LLM
# Call LLM (OpenAI-compatible endpoint)
resp = httpx.post(
f"{llm_url}/api/generate",
f"{llm_url}/v1/chat/completions",
headers=_auth_headers(cfg.llm_api_key),
json={
"model": llm_model,
"prompt": prompt,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {
"temperature": 0.3,
"num_predict": 500,
},
"think": False,
"reasoning_effort": "none",
"temperature": 0.3,
"max_tokens": 500,
},
timeout=180.0,
)
resp.raise_for_status()
answer = resp.json().get("response", "").strip()
answer = resp.json()["choices"][0]["message"]["content"].strip()

# Strip think tags if model includes them
answer = re.sub(r"<think>.*?</think>", "", answer, flags=re.DOTALL).strip()
Expand Down
31 changes: 29 additions & 2 deletions src/neurostack/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,12 @@ def _do_init(vault_root, cfg, profession_name=None, run_index=False):
f'embed_url = "{cfg.embed_url}"\n'
f'llm_url = "{cfg.llm_url}"\n'
f'llm_model = "{cfg.llm_model}"\n'
)
if cfg.llm_api_key:
config_text += f'llm_api_key = "{cfg.llm_api_key}"\n'
if cfg.embed_api_key:
config_text += f'embed_api_key = "{cfg.embed_api_key}"\n'
config_text += (
f'\n[writeback]\n'
f'enabled = {wb_enabled}\n'
f'path = "{cfg.writeback_path}"\n'
Expand Down Expand Up @@ -836,8 +842,9 @@ def cmd_init(args):
profession = _prompt("Profession pack", default="none", choices=prof_choices)

# 3. LLM configuration
print("\n \033[1mOllama Configuration\033[0m")
print(" NeuroStack uses Ollama for embeddings and summaries.\n")
print("\n \033[1mLLM Configuration\033[0m")
print(" NeuroStack works with any OpenAI-compatible endpoint")
print(" (Ollama, vLLM, Together AI, Groq, OpenRouter, etc.)\n")

embed_url = _prompt("Embedding endpoint", default=cfg.embed_url)
llm_url = _prompt("LLM endpoint", default=cfg.llm_url)
Expand All @@ -850,6 +857,22 @@ def cmd_init(args):
]
llm_model = _prompt("LLM model for summaries", default=cfg.llm_model, choices=model_choices)

# 3b. API keys (optional — only needed for cloud providers)
llm_api_key = ""
embed_api_key = ""
is_local = any(h in llm_url for h in ("localhost", "127.0.0.1", "0.0.0.0"))
if not is_local:
print("\n \033[1mAPI Authentication\033[0m")
print(" Cloud providers require an API key.\n")
llm_api_key = _prompt("LLM API key", default="")
if embed_url != llm_url:
embed_api_key = _prompt("Embedding API key", default="")
else:
embed_api_key = llm_api_key
elif _confirm("\n Configure API keys? (only needed for cloud providers)", default=False):
llm_api_key = _prompt("LLM API key", default="")
embed_api_key = _prompt("Embedding API key", default=llm_api_key)

# 4. Write-back
print(
"\n Enable memory write-back? Memories will be"
Expand All @@ -862,12 +885,14 @@ def cmd_init(args):

# Show summary
wb_label = "yes" if writeback else "no"
auth_label = "yes" if (llm_api_key or embed_api_key) else "no"
print("\n \033[1m━━━ Summary ━━━\033[0m\n")
print(f" Vault: {vault_root}")
print(f" Profession: {profession}")
print(f" Embed URL: {embed_url}")
print(f" LLM URL: {llm_url}")
print(f" LLM model: {llm_model}")
print(f" API auth: {auth_label}")
print(f" Write-back: {wb_label}")
print(f" Index now: {'yes' if run_index else 'no'}")

Expand All @@ -880,6 +905,8 @@ def cmd_init(args):
cfg.embed_url = embed_url
cfg.llm_url = llm_url
cfg.llm_model = llm_model
cfg.llm_api_key = llm_api_key
cfg.embed_api_key = embed_api_key
cfg.writeback_enabled = writeback

_do_init(
Expand Down
30 changes: 14 additions & 16 deletions src/neurostack/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@

log = logging.getLogger("neurostack")

from .config import get_config
from .config import _auth_headers, get_config

_cfg = get_config()
SUMMARIZE_URL = _cfg.llm_url
EMBED_URL = _cfg.embed_url
SUMMARIZE_MODEL = _cfg.llm_model
_LLM_HEADERS = _auth_headers(_cfg.llm_api_key)

COMMUNITY_PROMPT = (
"You are summarizing a cluster of thematically"
Expand Down Expand Up @@ -134,29 +135,26 @@ def _generate_community_summary(
note_summaries=notes_str or "(none)",
)

schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"summary": {"type": "string"},
},
"required": ["title", "summary"],
}

resp = httpx.post(
f"{base_url}/api/generate",
f"{base_url}/v1/chat/completions",
headers=_LLM_HEADERS,
json={
"model": model,
"prompt": prompt,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"format": schema,
"options": {"temperature": 0.3, "num_predict": 512},
"think": False,
"reasoning_effort": "none",
"temperature": 0.3,
"max_tokens": 512,
},
timeout=120.0,
)
resp.raise_for_status()
raw = resp.json().get("response", "").strip()
raw = resp.json()["choices"][0]["message"]["content"].strip()
# Strip markdown fences if present
if raw.startswith("```"):
import re
raw = re.sub(r"^```\w*\n?", "", raw)
raw = re.sub(r"\n?```$", "", raw).strip()

try:
parsed = json.loads(raw)
Expand Down
26 changes: 15 additions & 11 deletions src/neurostack/community_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@

log = logging.getLogger("neurostack")

from .config import get_config
from .config import _auth_headers, get_config

_cfg = get_config()
SUMMARIZE_URL = _cfg.llm_url
EMBED_URL = _cfg.embed_url
SUMMARIZE_MODEL = _cfg.llm_model
_LLM_HEADERS = _auth_headers(_cfg.llm_api_key)

_MAP_PROMPT = """You are analyzing a knowledge community summary to answer a question.

Expand Down Expand Up @@ -197,18 +198,20 @@ def global_query(
)
try:
resp = httpx.post(
f"{summarize_url}/api/generate",
f"{summarize_url}/v1/chat/completions",
headers=_LLM_HEADERS,
json={
"model": model,
"prompt": prompt,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {"temperature": 0.1, "num_predict": 256},
"think": False,
"reasoning_effort": "none",
"temperature": 0.1,
"max_tokens": 256,
},
timeout=60.0,
)
resp.raise_for_status()
finding = resp.json().get("response", "").strip()
finding = resp.json()["choices"][0]["message"]["content"].strip()
if finding:
findings.append(f"[{hit['title']}]\n{finding}")
except Exception as e:
Expand All @@ -228,18 +231,19 @@ def global_query(
)
try:
resp = httpx.post(
f"{summarize_url}/api/generate",
f"{summarize_url}/v1/chat/completions",
json={
"model": model,
"prompt": reduce_prompt,
"messages": [{"role": "user", "content": reduce_prompt}],
"stream": False,
"options": {"temperature": 0.3, "num_predict": 1024},
"think": False,
"reasoning_effort": "none",
"temperature": 0.3,
"max_tokens": 1024,
},
timeout=120.0,
)
resp.raise_for_status()
answer = resp.json().get("response", "").strip()
answer = resp.json()["choices"][0]["message"]["content"].strip()
except Exception as e:
log.warning(f"Reduce step failed: {e}")
answer = "\n\n".join(findings)
Expand Down
13 changes: 12 additions & 1 deletion src/neurostack/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class Config:
# NOTE: Verify the license of any model you configure here.
# phi3.5 is MIT licensed.
llm_model: str = "phi3.5"
llm_api_key: str = ""
embed_api_key: str = ""
session_dir: Path = field(default_factory=lambda: Path.home() / ".claude" / "projects")
api_host: str = "127.0.0.1"
api_port: int = 8000
Expand Down Expand Up @@ -57,7 +59,7 @@ def load_config() -> Config:
if key in data:
setattr(cfg, key, Path(os.path.expanduser(data[key])))
for key in ("embed_url", "embed_model", "llm_url", "llm_model",
"api_host", "api_key"):
"llm_api_key", "embed_api_key", "api_host", "api_key"):
if key in data:
setattr(cfg, key, data[key])
if "embed_dim" in data:
Expand All @@ -81,6 +83,8 @@ def load_config() -> Config:
"NEUROSTACK_EMBED_DIM": ("embed_dim", int),
"NEUROSTACK_LLM_URL": ("llm_url", str),
"NEUROSTACK_LLM_MODEL": ("llm_model", str),
"NEUROSTACK_LLM_API_KEY": ("llm_api_key", str),
"NEUROSTACK_EMBED_API_KEY": ("embed_api_key", str),
"NEUROSTACK_SESSION_DIR": ("session_dir", Path),
"NEUROSTACK_API_HOST": ("api_host", str),
"NEUROSTACK_API_PORT": ("api_port", int),
Expand All @@ -102,6 +106,13 @@ def load_config() -> Config:
return cfg


def _auth_headers(api_key: str) -> dict[str, str]:
"""Build Authorization header dict if an API key is set."""
if api_key:
return {"Authorization": f"Bearer {api_key}"}
return {}


# Module-level singleton
_config: Config | None = None

Expand Down
17 changes: 10 additions & 7 deletions src/neurostack/embedder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024-2026 Raphael Southall
"""Ollama embedding client."""
"""Embedding client (OpenAI-compatible /v1/ endpoints)."""

import json
from typing import Optional
Expand All @@ -13,12 +13,13 @@
except ImportError:
HAS_NUMPY = False

from .config import get_config
from .config import _auth_headers, get_config

_cfg = get_config()
DEFAULT_EMBED_URL = _cfg.embed_url
EMBED_MODEL = _cfg.embed_model
EMBED_DIM = _cfg.embed_dim
_EMBED_HEADERS = _auth_headers(_cfg.embed_api_key)


def get_embedding(
Expand All @@ -33,13 +34,14 @@ def get_embedding(
"Install with: pip install neurostack[full]"
)
resp = httpx.post(
f"{base_url}/api/embed",
f"{base_url}/v1/embeddings",
headers=_EMBED_HEADERS,
json={"model": model, "input": text},
timeout=30.0,
)
resp.raise_for_status()
data = resp.json()
return np.array(data["embeddings"][0], dtype=np.float32)
return np.array(data["data"][0]["embedding"], dtype=np.float32)


def get_embeddings_batch(
Expand All @@ -58,14 +60,15 @@ def get_embeddings_batch(
for i in range(0, len(texts), batch_size):
batch = texts[i : i + batch_size]
resp = httpx.post(
f"{base_url}/api/embed",
f"{base_url}/v1/embeddings",
headers=_EMBED_HEADERS,
json={"model": model, "input": batch},
timeout=60.0,
)
resp.raise_for_status()
data = resp.json()
for emb in data["embeddings"]:
all_embeddings.append(np.array(emb, dtype=np.float32))
for item in data["data"]:
all_embeddings.append(np.array(item["embedding"], dtype=np.float32))
return all_embeddings


Expand Down
13 changes: 8 additions & 5 deletions src/neurostack/harvest.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,19 +215,22 @@ def _llm_classify(
)

try:
from .config import _auth_headers, get_config
resp = httpx.post(
f"{llm_url}/api/generate",
f"{llm_url}/v1/chat/completions",
headers=_auth_headers(get_config().llm_api_key),
json={
"model": llm_model,
"prompt": prompt,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {"temperature": 0.1, "num_predict": 500},
"think": False,
"reasoning_effort": "none",
"temperature": 0.1,
"max_tokens": 500,
},
timeout=60.0,
)
resp.raise_for_status()
response = resp.json().get("response", "")
response = resp.json()["choices"][0]["message"]["content"]
# Strip think tags if present
response = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
except Exception as exc:
Expand Down
Loading
Loading