supports small llama and gemma models

Refactor inference dedicated crates for llama and gemma inferencing, not integrated
2025-09-08 22:46:44 +00:00 · 2025-08-29 18:15:29 -04:00
parent d06b16bb12
commit 315ef17605
26 changed files with 2136 additions and 1402 deletions
--- a/crates/inference-engine/Cargo.toml
+++ b/crates/inference-engine/Cargo.toml
@@ -3,9 +3,16 @@ name = "inference-engine"
 version = "0.1.0"
 edition = "2021"

+
 [[bin]]
-name="cli"
-path = "src/cli_main.rs"
+name="gemma_inference"
+path = "src/gemma_inference.rs"
+required-features = ["bin"]
+
+[[bin]]
+name="llama_inference"
+path = "src/llama_inference.rs"
+required-features = ["bin"]


 [dependencies]
@@ -50,6 +57,8 @@ utoipa = { version = "4.2.0", features = ["axum_extras"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 reborrow = "0.5.5"
 futures-util = "0.3.31"
+gemma-runner = { path = "../gemma-runner" }
+llama-runner = { path = "../llama-runner" }

 # --- Add this section for conditional compilation ---
 [target.'cfg(target_os = "macos")'.dependencies]
@@ -83,6 +92,9 @@ tokio = "1.43.0"
 anyhow = { version = "1", features = ["backtrace"] }
 bindgen_cuda = { version = "0.1.1", optional = true }

+[features]
+bin = []
+


 [package.metadata.compose]