From 4dba651237af7f214cf35d296d4b19b78007c8bf Mon Sep 17 00:00:00 2001 From: "novgorodcev.k" Date: Sun, 29 Mar 2026 20:06:41 +0300 Subject: [PATCH] front --- .env.example | 11 ++ .gitattributes | 6 ++ .gitignore | 6 +- Dockerfile | 33 ++++++ docker-compose.yml | 43 ++++++++ front/app/app.meta.tree | 1 + front/app/app.view.css.ts | 42 ++++++++ front/app/app.view.tree | 94 ++++++++++++++++ front/app/app.view.ts | 149 +++++++++++++++++++++++++ front/app/index.html | 14 +++ server/main.py | 221 ++++++++++++++++++++++++++++++++++++++ server/requirements.txt | 4 + 12 files changed, 623 insertions(+), 1 deletion(-) create mode 100644 .env.example create mode 100644 .gitattributes create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 front/app/app.meta.tree create mode 100644 front/app/app.view.css.ts create mode 100644 front/app/app.view.tree create mode 100644 front/app/app.view.ts create mode 100644 front/app/index.html create mode 100644 server/main.py create mode 100644 server/requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..117d133 --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +LLM_API_KEY=github_pat_... +LLM_BASE_URL=https://models.github.ai/inference +LLM_MODEL=openai/gpt-4.1-mini +LLM_RPM=60 + +EMBEDDER_API_KEY=github_pat_... +EMBEDDER_BASE_URL=https://models.github.ai/inference +EMBEDDER_MODEL=text-embedding-3-large +EMBEDDER_DIM=3072 + +RAGU_STORAGE=ragu_data diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a2da00f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +* -text +*.woff binary +*.woff2 binary +*.ttf binary +*.eot binary +*.otf binary diff --git a/.gitignore b/.gitignore index c3e69b5..d7c3f2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# $mol +-* +.DS_Store + checkpoints/ benchmark/*.json ragu_working_dir/ @@ -175,4 +179,4 @@ cython_debug/ .ruff_cache/ # PyPI configuration file -.pypirc \ No newline at end of file +.pypirc diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c7b74e0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# --- Frontend --- +FROM node:20-alpine AS frontend + +RUN apk add --no-cache git + +WORKDIR /app +RUN git clone --depth 1 https://github.com/hyoo-ru/mam.git . \ + && npm install + +COPY front/ bog/RAGU/front/ + +RUN npx mam bog/RAGU/front/app + +EXPOSE 9080 + +CMD ["npm", "start"] + + +# --- API --- +FROM python:3.12-slim AS api + +WORKDIR /app + +COPY pyproject.toml ./ +COPY ragu/ ./ragu/ +RUN pip install --no-cache-dir . + +COPY server/ ./server/ +RUN pip install --no-cache-dir -r server/requirements.txt + +EXPOSE 8000 + +CMD ["uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3b18626 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,43 @@ +services: + web: + build: + context: . + dockerfile: Dockerfile + target: frontend + ports: + - "9081:9080" + restart: unless-stopped + tty: true + stdin_open: true + develop: + watch: + - action: sync + path: ./front + target: /app/bog/RAGU/front + + # http://localhost:9081/bog/RAGU/front/app/-/test.html + + api: + platform: linux/arm64 + build: + context: . + dockerfile: Dockerfile + target: api + ports: + - "8100:8000" + restart: unless-stopped + env_file: .env + environment: + - NUMBA_CPU_NAME=generic + volumes: + - ragu_data:/app/ragu_data + develop: + watch: + - action: sync+restart + path: ./server + target: /app/server + + # http://localhost:8100/api/status + +volumes: + ragu_data: diff --git a/front/app/app.meta.tree b/front/app/app.meta.tree new file mode 100644 index 0000000..49489eb --- /dev/null +++ b/front/app/app.meta.tree @@ -0,0 +1 @@ +include \/mol/offline/install diff --git a/front/app/app.view.css.ts b/front/app/app.view.css.ts new file mode 100644 index 0000000..712a8f9 --- /dev/null +++ b/front/app/app.view.css.ts @@ -0,0 +1,42 @@ +namespace $.$$ { + + $mol_style_define( $bog_RAGU_front_app, { + + Documents: { + flex: { + basis: '30rem', + grow: 1, + }, + Body: { + flex: { + grow: 1, + }, + }, + }, + + Doc_text: { + flex: { + grow: 1, + }, + minHeight: '20rem', + }, + + Doc_file: { + alignItems: 'center', + gap: '.5rem', + }, + + Index_record: { + alignItems: 'center', + gap: '.5rem', + }, + + Settings_page: { + flex: { + basis: '25rem', + }, + }, + + } ) + +} diff --git a/front/app/app.view.tree b/front/app/app.view.tree new file mode 100644 index 0000000..38f13d3 --- /dev/null +++ b/front/app/app.view.tree @@ -0,0 +1,94 @@ +$bog_RAGU_front_app $giper_bot + dialog_title @ \RAGU + api_url \http://localhost:8100 + history? / + doc_text? \ + doc_files? / + index_message? \ + index_records? / + config_message? \ + llm_api_key? \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 + llm_base_url? \https://models.github.ai/inference + llm_model? \openai/gpt-4.1-mini + llm_rpm? \60 + embedder_api_key? \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 + embedder_base_url? \https://models.github.ai/inference + embedder_model? \text-embedding-3-large + embedder_dim? \3072 + Doc_file* $mol_row + sub / + <= Doc_file_icon* $mol_icon_file + <= Doc_file_name* $mol_view + sub / + <= doc_file_name* \ + <= Doc_file_remove* $mol_button_minor + click? <=> doc_file_remove*? null + sub / + <= Doc_file_remove_icon* $mol_icon_close + Index_record* $mol_row + sub / + <= Index_record_icon* $mol_icon_check + <= Index_record_info* $mol_view + sub / + <= index_record_text* \ + Documents $mol_page + title @ \Documents + body / + <= Doc_text $mol_textarea + hint @ \Paste text to build knowledge graph... + value? <=> doc_text? + <= Doc_open $mol_button_open + title @ \Add Files + accept \.txt,.md,.csv,.json,.xml,.html,.docx + files? <=> doc_files_add? null + <= Doc_file_list $mol_list + rows <= doc_file_rows / + <= Index_submit $mol_button_major + title @ \Build Knowledge Graph + click? <=> index_submit? null + <= Index_message $mol_text + text <= index_message? + <= Index_record_list $mol_list + rows <= index_record_rows / + Settings_page $mol_page + title @ \Settings + body / + <= Llm_api_key $mol_form_field + name @ \LLM API Key + Content <= Llm_api_key_input $mol_string + hint \sk-... + value? <=> llm_api_key? + <= Llm_base_url $mol_form_field + name @ \LLM Base URL + Content <= Llm_base_url_input $mol_string + value? <=> llm_base_url? + <= Llm_model $mol_form_field + name @ \LLM Model + Content <= Llm_model_input $mol_string + value? <=> llm_model? + <= Llm_rpm $mol_form_field + name @ \LLM RPM + Content <= Llm_rpm_input $mol_string + value? <=> llm_rpm? + <= Embedder_api_key $mol_form_field + name @ \Embedder API Key + Content <= Embedder_api_key_input $mol_string + hint \sk-... + value? <=> embedder_api_key? + <= Embedder_base_url $mol_form_field + name @ \Embedder Base URL + Content <= Embedder_base_url_input $mol_string + value? <=> embedder_base_url? + <= Embedder_model $mol_form_field + name @ \Embedder Model + Content <= Embedder_model_input $mol_string + value? <=> embedder_model? + <= Embedder_dim $mol_form_field + name @ \Embedder Dim + Content <= Embedder_dim_input $mol_string + value? <=> embedder_dim? + <= Config_save $mol_button_major + title @ \Save + click? <=> config_save? null + <= Config_message $mol_text + text <= config_message? diff --git a/front/app/app.view.ts b/front/app/app.view.ts new file mode 100644 index 0000000..c413099 --- /dev/null +++ b/front/app/app.view.ts @@ -0,0 +1,149 @@ +namespace $.$$ { + + type Request = { + message: string + files: string[] + } + + type IndexRecord = { + count: number + names: string[] + } + + export class $bog_RAGU_front_app extends $.$bog_RAGU_front_app { + + @ $mol_mem + config_synced() { + this.push_config() + return true + } + + @ $mol_mem + override pages() { + this.config_synced() + return [ + this.Settings_page(), + this.Documents(), + this.Dialog(), + ... this.result() ? [ this.Result_page( this.version() ) ] : [], + ] + } + + @ $mol_action + override doc_files_add( next: readonly File[] ) { + if( !next?.length ) return + this.doc_files([ ... this.doc_files(), ... next ]) + } + + override doc_file_rows() { + return this.doc_files().map( ( _, i ) => this.Doc_file( i ) ) + } + + override doc_file_name( index: number ) { + return ( this.doc_files()[ index ] as File ).name + } + + @ $mol_action + override doc_file_remove( index: number ) { + const files = [ ... this.doc_files() ] + files.splice( index, 1 ) + this.doc_files( files ) + } + + override index_record_rows() { + return this.index_records().map( ( _, i ) => this.Index_record( i ) ) + } + + override index_record_text( index: number ) { + const rec = this.index_records()[ index ] as IndexRecord + return `${ rec.count } doc(s): ${ rec.names.join( ', ' ) }` + } + + @ $mol_mem + override communication() { + + const history = this.history() + if( history.length % 2 === 0 ) return + + const last = history[ history.length - 1 ] as Request + + try { + const resp = $mol_fetch.json( + this.api_url() + '/api/query', + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ query: last.message }), + }, + ) + this.history([ ... history, resp ]) + } catch( error: any ) { + if( $mol_promise_like( error ) ) $mol_fail_hidden( error ) + if( $mol_fail_log( error ) ) { + this.history([ ... history, { message: '\u{1F6D1}' + error.message, files: [] } ]) + } + } + + } + + @ $mol_action + override index_submit() { + const text = this.doc_text() + const files = this.doc_files() as File[] + + if( !text && !files.length ) return + + const form = new FormData() + if( text ) form.append( 'text', text ) + for( const file of files ) { + form.append( 'files', file ) + } + + const resp = $mol_fetch.json( + this.api_url() + '/api/index', + { + method: 'POST', + body: form, + }, + ) as { status: string; documents_count: number; names: string[]; total_documents: number } + + this.index_records([ + ... this.index_records(), + { count: resp.documents_count, names: resp.names } as IndexRecord, + ]) + + this.index_message( `Indexed ${ resp.documents_count } doc(s). Total: ${ resp.total_documents }` ) + this.doc_text( '' ) + this.doc_files( [] ) + } + + @ $mol_action + push_config() { + $mol_fetch.json( + this.api_url() + '/api/config', + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ env: { + LLM_API_KEY: this.llm_api_key(), + LLM_BASE_URL: this.llm_base_url(), + LLM_MODEL: this.llm_model(), + LLM_RPM: this.llm_rpm(), + EMBEDDER_API_KEY: this.embedder_api_key(), + EMBEDDER_BASE_URL: this.embedder_base_url(), + EMBEDDER_MODEL: this.embedder_model(), + EMBEDDER_DIM: this.embedder_dim(), + } }), + }, + ) + } + + @ $mol_action + override config_save() { + this.push_config() + this.config_message( 'Saved' ) + } + + } + +} diff --git a/front/app/index.html b/front/app/index.html new file mode 100644 index 0000000..26dd154 --- /dev/null +++ b/front/app/index.html @@ -0,0 +1,14 @@ + + + + + RAGU + + + + + +
+ + + diff --git a/server/main.py b/server/main.py new file mode 100644 index 0000000..5a00fa3 --- /dev/null +++ b/server/main.py @@ -0,0 +1,221 @@ +import io +import os +from contextlib import asynccontextmanager + +from fastapi import FastAPI, UploadFile, File, Form +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +from ragu import ( + KnowledgeGraph, + SimpleChunker, + BuilderArguments, + LocalSearchEngine, + GlobalSearchEngine, + NaiveSearchEngine, + Settings, +) +from ragu.llm import OpenAIClient +from ragu.embedder import OpenAIEmbedder +from ragu.triplet import ArtifactsExtractorLLM + + +ENV_KEYS = [ + "LLM_API_KEY", "LLM_BASE_URL", "LLM_MODEL", "LLM_RPM", + "EMBEDDER_API_KEY", "EMBEDDER_BASE_URL", "EMBEDDER_MODEL", "EMBEDDER_DIM", + "RAGU_STORAGE", +] + + +class State: + kg: KnowledgeGraph | None = None + client: OpenAIClient | None = None + embedder: OpenAIEmbedder | None = None + indexed: bool = False + all_documents: list[str] = [] + all_names: list[str] = [] + + +state = State() + + +@asynccontextmanager +async def lifespan(app: FastAPI): + reinit_clients() + yield + if state.client: + await state.client.async_close() + if state.embedder: + await state.embedder.aclose() + + +app = FastAPI(title="RAGU API", lifespan=lifespan) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], +) + + +class QueryRequest(BaseModel): + query: str + engine: str = "local" + top_k: int = 20 + + +class BotResponse(BaseModel): + message: str + files: list[str] = [] + document: str | None = None + confidence: float = 0.0 + digest: str = "" + title: str = "" + + +class ConfigUpdate(BaseModel): + env: dict[str, str] + + +def reinit_clients(): + state.client = OpenAIClient( + model_name=os.getenv("LLM_MODEL", ""), + base_url=os.getenv("LLM_BASE_URL", ""), + api_token=os.getenv("LLM_API_KEY", ""), + max_requests_per_minute=int(os.getenv("LLM_RPM", "60")), + ) + state.embedder = OpenAIEmbedder( + model_name=os.getenv("EMBEDDER_MODEL", ""), + base_url=os.getenv("EMBEDDER_BASE_URL", os.getenv("LLM_BASE_URL", "")), + api_token=os.getenv("EMBEDDER_API_KEY", os.getenv("LLM_API_KEY", "")), + dim=int(os.getenv("EMBEDDER_DIM", "3072")), + ) + + +@app.get("/api/status") +async def get_status(): + return {"indexed": state.indexed} + + +@app.get("/api/config") +async def get_config(): + return {k: os.getenv(k, "") for k in ENV_KEYS} + + +@app.post("/api/config") +async def set_config(req: ConfigUpdate): + for k, v in req.env.items(): + if k in ENV_KEYS: + os.environ[k] = v + reinit_clients() + return {k: os.getenv(k, "") for k in ENV_KEYS} + + +async def _read_file(upload: UploadFile) -> str: + content = await upload.read() + ext = (upload.filename or "").rsplit(".", 1)[-1].lower() + + if ext == "docx": + from docx import Document as DocxDocument + doc = DocxDocument(io.BytesIO(content)) + return "\n".join(p.text for p in doc.paragraphs if p.text.strip()) + + return content.decode("utf-8") + + +@app.post("/api/index") +async def index_documents( + files: list[UploadFile] = File(default=[]), + text: str = Form(default=""), + language: str = Form(default="russian"), +): + documents: list[str] = [] + names: list[str] = [] + + if text.strip(): + documents.append(text.strip()) + names.append("(text)") + + for f in files: + doc_text = await _read_file(f) + if doc_text.strip(): + documents.append(doc_text) + names.append(f.filename or "unknown") + + if not documents: + return {"status": "empty", "documents_count": 0, "names": []} + + state.all_documents.extend(documents) + state.all_names.extend(names) + + try: + Settings.storage_folder = os.getenv("RAGU_STORAGE", "ragu_data") + Settings.language = language + + chunker = SimpleChunker(max_chunk_size=1000) + extractor = ArtifactsExtractorLLM(client=state.client, do_validation=False) + + kg = KnowledgeGraph( + client=state.client, + embedder=state.embedder, + chunker=chunker, + artifact_extractor=extractor, + builder_settings=BuilderArguments( + use_llm_summarization=True, + vectorize_chunks=True, + ), + ) + + await kg.build_from_docs(state.all_documents) + + state.kg = kg + state.indexed = True + + except Exception as e: + # Roll back documents added in this request + state.all_documents = state.all_documents[:-len(documents)] + state.all_names = state.all_names[:-len(names)] + return JSONResponse( + status_code=500, + content={"error": str(e), "names": names, "documents_count": 0}, + ) + + return { + "status": "indexed", + "documents_count": len(documents), + "names": names, + "total_documents": len(state.all_documents), + } + + +@app.post("/api/query", response_model=BotResponse) +async def query_graph(req: QueryRequest): + if not state.kg: + return BotResponse( + message="Knowledge graph not built yet. Please index documents first.", + ) + + try: + if req.engine == "local": + engine = LocalSearchEngine(state.client, state.kg, state.embedder) + answer = await engine.a_query(req.query, top_k=req.top_k) + elif req.engine == "global": + engine = GlobalSearchEngine(state.client, state.kg) + answer = await engine.a_query(req.query) + elif req.engine == "naive": + engine = NaiveSearchEngine(state.client, state.kg, state.embedder) + answer = await engine.a_query(req.query, top_k=req.top_k) + else: + return BotResponse(message=f"Unknown engine: {req.engine}") + except Exception as e: + return BotResponse(message=f"Error: {e}") + + return BotResponse( + message=answer, + confidence=0.8, + title=req.query[:50], + ) + + diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..f6b3ebf --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,4 @@ +fastapi +uvicorn +python-docx +python-multipart