diff --git a/Cargo.lock b/Cargo.lock index 4f04282..2bfd367 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2351,6 +2351,57 @@ version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +[[package]] +name = "inference-engine" +version = "0.1.0" +dependencies = [ + "ab_glyph", + "accelerate-src", + "anyhow", + "axum 0.7.9", + "bindgen_cuda", + "byteorder", + "candle-core", + "candle-datasets", + "candle-flash-attn", + "candle-nn", + "candle-onnx", + "candle-transformers", + "clap", + "cpal", + "csv", + "cudarc", + "either", + "enterpolation", + "half", + "hf-hub", + "image", + "imageproc", + "intel-mkl-src", + "memmap2", + "num-traits", + "palette", + "pdf2image", + "pyo3", + "rand 0.9.1", + "rayon", + "reborrow", + "rubato", + "safetensors", + "serde", + "serde_json", + "symphonia", + "tokenizers", + "tokio", + "tower 0.4.13", + "tower-http 0.5.2", + "tracing", + "tracing-chrome", + "tracing-subscriber", + "utoipa", + "uuid", +] + [[package]] name = "instant" version = "0.1.13" @@ -2620,57 +2671,6 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" -[[package]] -name = "local_inference_engine" -version = "0.1.0" -dependencies = [ - "ab_glyph", - "accelerate-src", - "anyhow", - "axum 0.7.9", - "bindgen_cuda", - "byteorder", - "candle-core", - "candle-datasets", - "candle-flash-attn", - "candle-nn", - "candle-onnx", - "candle-transformers", - "clap", - "cpal", - "csv", - "cudarc", - "either", - "enterpolation", - "half", - "hf-hub", - "image", - "imageproc", - "intel-mkl-src", - "memmap2", - "num-traits", - "palette", - "pdf2image", - "pyo3", - "rand 0.9.1", - "rayon", - "reborrow", - "rubato", - "safetensors", - "serde", - "serde_json", - "symphonia", - "tokenizers", - "tokio", - "tower 0.4.13", - "tower-http 0.5.2", - "tracing", - "tracing-chrome", - "tracing-subscriber", - "utoipa", - "uuid", -] - [[package]] name = "lock_api" version = "0.4.12" diff --git a/Cargo.toml b/Cargo.toml index 4a9200d..ecdbefa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [workspace] members = [ "crates/agent-server", - "crates/local_inference_engine", + "crates/inference-engine", ] resolver = "2" diff --git a/README.md b/README.md index d12aa7b..8b02445 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,17 @@ # open-web-agent-rs -A Rust-based web agent with an embedded OpenAI-compatible inference server (supports Gemma models only). +A Rust-based web agent with an embedded OpenAI-compatible inference server (supports Gemma models only). It is packaged and deployed as a container. ## Project Structure This project is organized as a Cargo workspace with the following crates: -- `agent-server`: The main web agent server -- `local_inference_engine`: An embedded OpenAI-compatible inference server for Gemma models +- `crates` + - [agent-server](crates/agent-server): The main web agent server + - [inference-engine](crates/inference-engine): An embedded OpenAI-compatible inference server for Gemma models +- `packages` + - [genaiscript](packages/genaiscript): GenaiScript scripts + - [genaiscript-rust-shim](packages/genaiscript-rust-shim): An embedded OpenAI-compatible inference server for Gemma models ## Acknowledgements @@ -61,9 +65,9 @@ flowchart LR end %% ─────────────── Local inference ─────────────── - subgraph INFERENCE["Local Inference Engine"] + subgraph INFERENCE["Inference Engine"] direction TB - LIE["Local Inference Engine"]:::core -->|loads| MODELS["Gemma Models"]:::model + LIE["Inference Engine"]:::core -->|loads| MODELS["Gemma Models"]:::model LIE -->|exposes| OPENAI_API["OpenAI‑compatible API"]:::api MODELS -->|runs on| ACCEL @@ -121,12 +125,12 @@ flowchart LR ## Running the Project -### Local Inference Engine +### Inference Engine To run the local inference engine: ```bash -cd crates/local_inference_engine +cd crates/inference-engine cargo run --release -- --server ``` @@ -159,5 +163,5 @@ To build a specific crate: ```bash cargo build -p agent-server # or -cargo build -p local_inference_engine +cargo build -p inference-engine ``` diff --git a/crates/agent-server/src/agents/deep_research.rs b/crates/agent-server/src/agents/deep_research.rs index 299742c..091b5a2 100644 --- a/crates/agent-server/src/agents/deep_research.rs +++ b/crates/agent-server/src/agents/deep_research.rs @@ -13,6 +13,7 @@ mod tests { use std::fmt::Debug; #[tokio::test] + #[ignore] async fn test_deepresearch() { // a really provocative question for research that generally yields infinite complexity with each run let input = "What is a life of meaning?"; diff --git a/crates/agent-server/src/agents/search.rs b/crates/agent-server/src/agents/search.rs index da2af98..bacdffe 100644 --- a/crates/agent-server/src/agents/search.rs +++ b/crates/agent-server/src/agents/search.rs @@ -12,7 +12,8 @@ mod tests { use std::fmt::Debug; use crate::agents::search::agent; - #[tokio::test] + #[tokio::test] + #[ignore] async fn test_search_execution() { let input = "Who won the 2024 presidential election?"; diff --git a/crates/local_inference_engine/Cargo.lock b/crates/inference-engine/Cargo.lock similarity index 100% rename from crates/local_inference_engine/Cargo.lock rename to crates/inference-engine/Cargo.lock diff --git a/crates/local_inference_engine/Cargo.toml b/crates/inference-engine/Cargo.toml similarity index 98% rename from crates/local_inference_engine/Cargo.toml rename to crates/inference-engine/Cargo.toml index 2f083cd..4a3257d 100644 --- a/crates/local_inference_engine/Cargo.toml +++ b/crates/inference-engine/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "local_inference_engine" +name = "inference-engine" version = "0.1.0" edition = "2021" diff --git a/crates/local_inference_engine/README.md b/crates/inference-engine/README.md similarity index 95% rename from crates/local_inference_engine/README.md rename to crates/inference-engine/README.md index 2b72c03..f464b81 100644 --- a/crates/local_inference_engine/README.md +++ b/crates/inference-engine/README.md @@ -1,4 +1,4 @@ -# Local Inference Engine +# @open-web-agent-rs/inference-engine A Rust-based inference engine for running large language models locally. This tool supports both CLI mode for direct text generation and server mode with an OpenAI-compatible API. @@ -30,7 +30,7 @@ A Rust-based inference engine for running large language models locally. This to 2. Build the local inference engine: ```bash - cargo build -p local_inference_engine --release + cargo build -p inference-engine --release ``` ## Usage @@ -40,7 +40,7 @@ A Rust-based inference engine for running large language models locally. This to Run the inference engine in CLI mode to generate text directly: ```bash -cargo run -p local_inference_engine --release -- --prompt "Your prompt text here" --which 3-1b-it +cargo run -p inference-engine --release -- --prompt "Your prompt text here" --which 3-1b-it ``` #### CLI Options @@ -62,7 +62,7 @@ cargo run -p local_inference_engine --release -- --prompt "Your prompt text here Run the inference engine in server mode to expose an OpenAI-compatible API: ```bash -cargo run -p local_inference_engine --release -- --server --port 3777 --which 3-1b-it +cargo run -p inference-engine --release -- --server --port 3777 --which 3-1b-it ``` This starts a web server on the specified port (default: 3777) with an OpenAI-compatible chat completions endpoint. diff --git a/crates/local_inference_engine/api_test.html b/crates/inference-engine/api_test.html similarity index 100% rename from crates/local_inference_engine/api_test.html rename to crates/inference-engine/api_test.html diff --git a/crates/local_inference_engine/openai_api_test.js b/crates/inference-engine/openai-api-test.js similarity index 98% rename from crates/local_inference_engine/openai_api_test.js rename to crates/inference-engine/openai-api-test.js index 016dabc..51f2e3e 100644 --- a/crates/local_inference_engine/openai_api_test.js +++ b/crates/inference-engine/openai-api-test.js @@ -173,4 +173,4 @@ })(); console.log("\nAll test requests have been sent. Check the server logs for more details."); -console.log("To run the server, use: cargo run --bin local_inference_engine -- --server"); +console.log("To run the server, use: cargo run --bin inference-engine -- --server"); diff --git a/crates/local_inference_engine/src/cli.rs b/crates/inference-engine/src/cli.rs similarity index 100% rename from crates/local_inference_engine/src/cli.rs rename to crates/inference-engine/src/cli.rs diff --git a/crates/local_inference_engine/src/lib.rs b/crates/inference-engine/src/lib.rs similarity index 100% rename from crates/local_inference_engine/src/lib.rs rename to crates/inference-engine/src/lib.rs diff --git a/crates/local_inference_engine/src/main.rs b/crates/inference-engine/src/main.rs similarity index 99% rename from crates/local_inference_engine/src/main.rs rename to crates/inference-engine/src/main.rs index bfbdb03..b945371 100644 --- a/crates/local_inference_engine/src/main.rs +++ b/crates/inference-engine/src/main.rs @@ -246,7 +246,7 @@ async fn chat_completions( StatusCode::BAD_REQUEST, Json(serde_json::json!({ "error": { - "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"", + "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"", "type": "unsupported_api" } })), @@ -292,6 +292,7 @@ use candle_core::{DType, Device, MetalDevice, Tensor}; use candle_nn::VarBuilder; use candle_transformers::generation::LogitsProcessor; use hf_hub::{Repo, RepoType, api::sync::Api}; +use serde_json::json; use tokenizers::Tokenizer; use crate::token_output_stream::TokenOutputStream; use crate::utilities_lib::device; @@ -596,7 +597,7 @@ impl TextGeneration { logits } else { let start_at = tokens.len().saturating_sub(self.repeat_last_n); - + // Manual implementation of repeat penalty to avoid type conflicts let mut logits_vec = logits.to_vec1::()?; diff --git a/crates/local_inference_engine/src/model.rs b/crates/inference-engine/src/model.rs similarity index 100% rename from crates/local_inference_engine/src/model.rs rename to crates/inference-engine/src/model.rs diff --git a/crates/local_inference_engine/src/openai_types.rs b/crates/inference-engine/src/openai_types.rs similarity index 100% rename from crates/local_inference_engine/src/openai_types.rs rename to crates/inference-engine/src/openai_types.rs diff --git a/crates/local_inference_engine/src/server.rs b/crates/inference-engine/src/server.rs similarity index 97% rename from crates/local_inference_engine/src/server.rs rename to crates/inference-engine/src/server.rs index e1ee272..d80e7d6 100644 --- a/crates/local_inference_engine/src/server.rs +++ b/crates/inference-engine/src/server.rs @@ -67,7 +67,7 @@ pub async fn chat_completions( StatusCode::BAD_REQUEST, Json(serde_json::json!({ "error": { - "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"", + "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"", "type": "unsupported_api" } })), diff --git a/crates/local_inference_engine/src/text_generation.rs b/crates/inference-engine/src/text_generation.rs similarity index 100% rename from crates/local_inference_engine/src/text_generation.rs rename to crates/inference-engine/src/text_generation.rs diff --git a/crates/local_inference_engine/src/token_output_stream.rs b/crates/inference-engine/src/token_output_stream.rs similarity index 100% rename from crates/local_inference_engine/src/token_output_stream.rs rename to crates/inference-engine/src/token_output_stream.rs diff --git a/crates/local_inference_engine/src/utilities_lib.rs b/crates/inference-engine/src/utilities_lib.rs similarity index 100% rename from crates/local_inference_engine/src/utilities_lib.rs rename to crates/inference-engine/src/utilities_lib.rs diff --git a/crates/local_inference_engine/test.sh b/crates/inference-engine/test.sh similarity index 100% rename from crates/local_inference_engine/test.sh rename to crates/inference-engine/test.sh diff --git a/crates/local_inference_engine/tests/model_tests.rs b/crates/inference-engine/tests/model_tests.rs similarity index 98% rename from crates/local_inference_engine/tests/model_tests.rs rename to crates/inference-engine/tests/model_tests.rs index 3b7f528..1ff6cad 100644 --- a/crates/local_inference_engine/tests/model_tests.rs +++ b/crates/inference-engine/tests/model_tests.rs @@ -1,4 +1,4 @@ -use local_inference_engine::model::{Model, Which}; +use inference_engine::model::{Model, Which}; #[cfg(test)] mod tests { diff --git a/crates/local_inference_engine/tests/text_generation_tests.rs b/crates/inference-engine/tests/text_generation_tests.rs similarity index 93% rename from crates/local_inference_engine/tests/text_generation_tests.rs rename to crates/inference-engine/tests/text_generation_tests.rs index 7fb9cf9..5927e4c 100644 --- a/crates/local_inference_engine/tests/text_generation_tests.rs +++ b/crates/inference-engine/tests/text_generation_tests.rs @@ -1,11 +1,8 @@ -use local_inference_engine::text_generation::TextGeneration; -use local_inference_engine::model::{Model, Which}; -use local_inference_engine::token_output_stream::TokenOutputStream; -use tokenizers::Tokenizer; -use candle_core::{DType, Device, Tensor}; -use candle_transformers::generation::LogitsProcessor; use anyhow::Result; -use std::sync::Arc; +use candle_transformers::generation::LogitsProcessor; +use inference_engine::model::Which; +use inference_engine::token_output_stream::TokenOutputStream; +use tokenizers::Tokenizer; #[cfg(test)] mod tests { diff --git a/crates/local_inference_engine/tests/token_output_stream_tests.rs b/crates/inference-engine/tests/token_output_stream_tests.rs similarity index 98% rename from crates/local_inference_engine/tests/token_output_stream_tests.rs rename to crates/inference-engine/tests/token_output_stream_tests.rs index 5468ad5..7b842b7 100644 --- a/crates/local_inference_engine/tests/token_output_stream_tests.rs +++ b/crates/inference-engine/tests/token_output_stream_tests.rs @@ -1,4 +1,4 @@ -use local_inference_engine::token_output_stream::TokenOutputStream; +use inference_engine::token_output_stream::TokenOutputStream; use tokenizers::Tokenizer; use std::path::PathBuf; use anyhow::Result; diff --git a/package.json b/package.json index 5cc7041..7aa134e 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,8 @@ "private": true, "scripts": { "clean": "rm -rf .genaiscript && rm -rf dist && rm -rf node_modules && rm -rf open-web-agent-rs && rm -rf target && rm -rf packages/genaiscript-rust-shim/dist", + "build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build -p agent-server)", + "mcp-inspector": "bunx @modelcontextprotocol/inspector", "dev": "bun i && ./killport.js 3006 && bun run build && cargo watch -x 'run -p agent-server'", "start": "docker compose up --build", "ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'", @@ -16,8 +18,6 @@ "ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'", "prod:logs": "fly logs", "test-http": "test/test-search.ts", - "mcp-inspector": "bunx @modelcontextprotocol/inspector", - "build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build -p agent-server)" }, "dependencies": { "@modelcontextprotocol/inspector": "^0.14.0"