From 8d2b85b0b9e9ab8eed790bf728a39469ec434eaa Mon Sep 17 00:00:00 2001
From: geoffsee <>
Date: Sun, 31 Aug 2025 19:27:15 -0400
Subject: [PATCH] update docs

---
 .gitignore                         |   2 -
 README.md                          |  35 +++++-----
 crates/chat-ui/README.md           |  41 +++++++++++-
 crates/cli/README.md               |  11 ++--
 crates/embeddings-engine/README.md | 100 ++++++++++++++++++++++++++++-
 crates/helm-chart-tool/README.md   |   2 +-
 docs/ARCHITECTURE.md               |  11 ++--
 7 files changed, 167 insertions(+), 35 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4896b72..0ce0aea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -74,8 +74,6 @@ venv/
 # Backup files
 *.bak
 *.backup
-*~
-/scripts/cli
 !/scripts/cli.ts
 /**/.*.bun-build
 /AGENTS.md
diff --git a/README.md b/README.md
index 1aa8dd9..220e17a 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ The system supports both CPU and GPU acceleration (CUDA/Metal), with intelligent
 
 ### Workspace Structure
 
-The project uses a 7-crate Rust workspace plus TypeScript components:
+The project uses a 9-crate Rust workspace plus TypeScript components:
 
 ```
 crates/
@@ -51,17 +51,18 @@ crates/
 ├── gemma-runner/          # Gemma model inference via Candle (Rust 2021)
 ├── llama-runner/          # Llama model inference via Candle (Rust 2021)
 ├── embeddings-engine/     # FastEmbed embeddings service (Rust 2024)
-├── leptos-app/            # WASM web frontend (Rust 2021)
+├── chat-ui/               # WASM web frontend (Rust 2021)
 ├── helm-chart-tool/       # Kubernetes deployment tooling (Rust 2024)
-└── scripts/
-    └── cli.ts             # TypeScript/Bun CLI client
+└── cli/                   # CLI client crate (Rust 2024)
+    └── package/
+        └── cli.ts         # TypeScript/Bun CLI client
 ```
 
 ### Service Architecture
 
 - **Main Server** (port 8080): Orchestrates inference and embeddings services
 - **Embeddings Service** (port 8080): Standalone FastEmbed service with OpenAI API compatibility  
-- **Web Frontend** (port 8788): cargo leptos SSR app
+- **Web Frontend** (port 8788): chat-ui WASM app
 - **CLI Client**: TypeScript/Bun client for testing and automation
 
 ### Deployment Modes
@@ -144,26 +145,26 @@ cargo build --bin embeddings-engine --release
 
 #### Web Frontend (Port 8788)  
 ```bash
-cd crates/leptos-app
+cd crates/chat-ui
 ./run.sh
 ```
-- Serves Leptos WASM frontend on port 8788
+- Serves chat-ui WASM frontend on port 8788
 - Sets required RUSTFLAGS for WebAssembly getrandom support
 - Auto-reloads during development
 
 #### TypeScript CLI Client
 ```bash
 # List available models
-bun run scripts/cli.ts --list-models
+cd crates/cli/package && bun run cli.ts --list-models
 
 # Chat completion
-bun run scripts/cli.ts "What is the capital of France?"
+cd crates/cli/package && bun run cli.ts "What is the capital of France?"
 
 # With specific model
-bun run scripts/cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
+cd crates/cli/package && bun run cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
 
 # Show help
-bun run scripts/cli.ts --help
+cd crates/cli/package && bun run cli.ts --help
 ```
 
 ## API Usage
@@ -279,7 +280,7 @@ cargo test --workspace
 
 **End-to-end test script:**
 ```bash
-./smoke_test.sh
+./scripts/smoke_test.sh
 ```
 
 This script:
@@ -368,7 +369,7 @@ All services include Docker metadata in `Cargo.toml`:
 - Port: 8080
 
 **Web Frontend:**
-- Image: `ghcr.io/geoffsee/leptos-app:latest`
+- Image: `ghcr.io/geoffsee/chat-ui:latest`
 - Port: 8788
 
 **Docker Compose:**
@@ -427,7 +428,7 @@ For Kubernetes deployment details, see the [ARCHITECTURE.md](docs/ARCHITECTURE.m
 **Symptom:** WASM compilation failures  
 **Solution:**
 1. Install required targets: `rustup target add wasm32-unknown-unknown`
-2. Check RUSTFLAGS in leptos-app/run.sh
+2. Check RUSTFLAGS in chat-ui/run.sh
 
 ### Network/Timeout Issues
 **Symptom:** First-time model downloads timing out  
@@ -458,18 +459,18 @@ curl -s http://localhost:8080/v1/models | jq
 
 **CLI client test:**
 ```bash
-bun run scripts/cli.ts "What is 2+2?"
+cd crates/cli/package && bun run cli.ts "What is 2+2?"
 ```
 
 **Web frontend:**
 ```bash
-cd crates/leptos-app && ./run.sh &
+cd crates/chat-ui && ./run.sh &
 # Navigate to http://localhost:8788
 ```
 
 **Integration test:**
 ```bash
-./smoke_test.sh
+./scripts/smoke_test.sh
 ```
 
 **Cleanup:**
diff --git a/crates/chat-ui/README.md b/crates/chat-ui/README.md
index 63181d1..edb321e 100644
--- a/crates/chat-ui/README.md
+++ b/crates/chat-ui/README.md
@@ -1,2 +1,41 @@
 # chat-ui
-This is served by the predict-otron-9000 server. This needs to be built before the server.
\ No newline at end of file
+
+A WASM-based web chat interface for the predict-otron-9000 AI platform.
+
+## Overview
+
+The chat-ui provides a real-time web interface for interacting with language models through the predict-otron-9000 server. Built with Leptos and compiled to WebAssembly, it offers a modern chat experience with streaming response support.
+
+## Features
+
+- Real-time chat interface with the inference server
+- Streaming response support
+- Conversation history
+- Responsive web design
+- WebAssembly-powered for optimal performance
+
+## Building and Running
+
+### Prerequisites
+- Rust toolchain with WASM target: `rustup target add wasm32-unknown-unknown`
+- The predict-otron-9000 server must be running on port 8080
+
+### Development Server
+```bash
+cd crates/chat-ui
+./run.sh
+```
+
+This starts the development server on port 8788 with auto-reload capabilities.
+
+### Usage
+1. Start the predict-otron-9000 server: `./scripts/run_server.sh`
+2. Start the chat-ui: `cd crates/chat-ui && ./run.sh`
+3. Navigate to `http://localhost:8788`
+4. Start chatting with your AI models!
+
+## Technical Details
+- Built with Leptos framework
+- Compiled to WebAssembly for browser execution
+- Communicates with predict-otron-9000 API via HTTP
+- Sets required RUSTFLAGS for WebAssembly getrandom support
\ No newline at end of file
diff --git a/crates/cli/README.md b/crates/cli/README.md
index 0644108..f93bdab 100644
--- a/crates/cli/README.md
+++ b/crates/cli/README.md
@@ -3,7 +3,7 @@
 A Rust/Typescript Hybrid
 
 ```console
-./cli [options] [prompt]
+bun run cli.ts [options] [prompt]
 
 Simple CLI tool for testing the local OpenAI-compatible API server.
 
@@ -14,10 +14,11 @@ Options:
   --help              Show this help message
 
 Examples:
-  ./cli "What is the capital of France?"
-  ./cli --model gemma-3-1b-it --prompt "Hello, world!"
-  ./cli --prompt "Who was the 16th president of the United States?"
-  ./cli --list-models
+  cd crates/cli/package
+  bun run cli.ts "What is the capital of France?"
+  bun run cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
+  bun run cli.ts --prompt "Who was the 16th president of the United States?"
+  bun run cli.ts --list-models
 
 The server must be running at http://localhost:8080
 ```
\ No newline at end of file
diff --git a/crates/embeddings-engine/README.md b/crates/embeddings-engine/README.md
index c47ea5a..2ad58b9 100644
--- a/crates/embeddings-engine/README.md
+++ b/crates/embeddings-engine/README.md
@@ -1,4 +1,100 @@
 # Embeddings Engine
 
-A high-performance text embeddings service that generates vector representations of text using state-of-the-art models. 
-This crate wraps the fastembed crate to provide embeddings and partially adapts the openai specification.  
\ No newline at end of file
+A high-performance text embeddings service that generates vector representations of text using state-of-the-art models. This crate wraps the FastEmbed library to provide embeddings with OpenAI-compatible API endpoints.
+
+## Overview
+
+The embeddings-engine provides a standalone service for generating text embeddings that can be used for semantic search, similarity comparisons, and other NLP tasks. It's designed to be compatible with OpenAI's embeddings API format.
+
+## Features
+
+- **OpenAI-Compatible API**: `/v1/embeddings` endpoint matching OpenAI's specification
+- **FastEmbed Integration**: Powered by the FastEmbed library for high-quality embeddings
+- **Multiple Model Support**: Support for various embedding models
+- **High Performance**: Optimized for fast embedding generation
+- **Standalone Service**: Can run independently or as part of the predict-otron-9000 platform
+
+## Building and Running
+
+### Prerequisites
+- Rust toolchain
+- Internet connection for initial model downloads
+
+### Standalone Server
+```bash
+cargo run --bin embeddings-engine --release
+```
+
+The service will start on port 8080 by default.
+
+## API Usage
+
+### Generate Embeddings
+
+**Endpoint**: `POST /v1/embeddings`
+
+**Request Body**:
+```json
+{
+  "input": "Your text to embed",
+  "model": "nomic-embed-text-v1.5"
+}
+```
+
+**Response**:
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "object": "embedding",
+      "index": 0,
+      "embedding": [0.1, 0.2, 0.3, ...]
+    }
+  ],
+  "model": "nomic-embed-text-v1.5",
+  "usage": {
+    "prompt_tokens": 0,
+    "total_tokens": 0
+  }
+}
+```
+
+### Example Usage
+
+**Using cURL**:
+```bash
+curl -s http://localhost:8080/v1/embeddings \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": "The quick brown fox jumps over the lazy dog",
+    "model": "nomic-embed-text-v1.5"
+  }' | jq
+```
+
+**Using Python OpenAI Client**:
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="dummy"  # Not validated but required by client
+)
+
+response = client.embeddings.create(
+    input="Your text here",
+    model="nomic-embed-text-v1.5"
+)
+
+print(response.data[0].embedding)
+```
+
+## Configuration
+
+The service can be configured through environment variables:
+- `SERVER_PORT`: Port to run on (default: 8080)
+- `RUST_LOG`: Logging level (default: info)
+
+## Integration
+
+This service is designed to work seamlessly with the predict-otron-9000 main server, but can also be deployed independently for dedicated embeddings workloads.
\ No newline at end of file
diff --git a/crates/helm-chart-tool/README.md b/crates/helm-chart-tool/README.md
index 58ee48d..f216d55 100644
--- a/crates/helm-chart-tool/README.md
+++ b/crates/helm-chart-tool/README.md
@@ -137,7 +137,7 @@ Parsing workspace at: ..
 Output directory: ../generated-helm-chart
 Chart name: predict-otron-9000
 Found 4 services:
-  - leptos-app: ghcr.io/geoffsee/leptos-app:latest (port 8788)
+  - chat-ui: ghcr.io/geoffsee/chat-ui:latest (port 8788)
   - inference-engine: ghcr.io/geoffsee/inference-service:latest (port 8080)
   - embeddings-engine: ghcr.io/geoffsee/embeddings-service:latest (port 8080)
   - predict-otron-9000: ghcr.io/geoffsee/predict-otron-9000:latest (port 8080)
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index b256389..44ffbc6 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -52,7 +52,7 @@ graph TB
 
 ## Workspace Structure
 
-The project uses a 7-crate Rust workspace with TypeScript tooling, designed for maximum flexibility in deployment configurations.
+The project uses a 9-crate Rust workspace with TypeScript tooling, designed for maximum flexibility in deployment configurations.
 
 ```mermaid
 graph TD
@@ -69,18 +69,15 @@ graph TD
         end
         
         subgraph "Frontend"
-            D[leptos-app<br/>Edition: 2021<br/>Port: 3000/8788<br/>WASM/SSR]
+            D[chat-ui<br/>Edition: 2021<br/>Port: 8788<br/>WASM UI]
         end
         
         subgraph "Tooling"
             L[helm-chart-tool<br/>Edition: 2024<br/>K8s deployment]
+            E[cli<br/>Edition: 2024<br/>TypeScript/Bun CLI]
         end
     end
     
-    subgraph "External Tooling"
-        E[scripts/cli.ts<br/>TypeScript/Bun<br/>OpenAI SDK]
-    end
-    
     subgraph "Dependencies"
         A --> B
         A --> C
@@ -193,7 +190,7 @@ graph TB
         end
         
         subgraph "Frontend"
-            D[leptos-app Pod<br/>:8788<br/>ClusterIP Service]
+            D[chat-ui Pod<br/>:8788<br/>ClusterIP Service]
         end
         
         subgraph "Ingress"