diff --git a/crates/embeddings-engine/src/lib.rs b/crates/embeddings-engine/src/lib.rs index 8a37f42..023413a 100644 --- a/crates/embeddings-engine/src/lib.rs +++ b/crates/embeddings-engine/src/lib.rs @@ -1,10 +1,5 @@ use async_openai::types::{CreateEmbeddingRequest, EmbeddingInput}; -use axum::{ - Json, Router, - http::StatusCode, - response::Json as ResponseJson, - routing::post, -}; +use axum::{Json, Router, http::StatusCode, response::Json as ResponseJson, routing::post}; use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; use once_cell::sync::Lazy; use serde::Serialize; diff --git a/crates/inference-engine/src/lib.rs b/crates/inference-engine/src/lib.rs index b1643b6..4f709d6 100644 --- a/crates/inference-engine/src/lib.rs +++ b/crates/inference-engine/src/lib.rs @@ -8,7 +8,7 @@ pub mod server; // Re-export key components for easier access pub use inference::ModelInference; pub use model::{Model, Which}; -pub use server::{create_router, AppState}; +pub use server::{AppState, create_router}; use std::env; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; diff --git a/crates/inference-engine/src/main.rs b/crates/inference-engine/src/main.rs index 690c9fc..e6b0338 100644 --- a/crates/inference-engine/src/main.rs +++ b/crates/inference-engine/src/main.rs @@ -1,23 +1,26 @@ -use inference_engine::{create_router, init_tracing, get_server_config, AppState}; +use inference_engine::{AppState, create_router, get_server_config, init_tracing}; use tokio::net::TcpListener; use tracing::info; #[tokio::main] async fn main() -> Result<(), Box> { init_tracing(); - + let app_state = AppState::default(); let app = create_router(app_state); - + let (server_host, server_port, server_address) = get_server_config(); let listener = TcpListener::bind(&server_address).await?; - - info!("Inference Engine server starting on http://{}", server_address); + + info!( + "Inference Engine server starting on http://{}", + server_address + ); info!("Available endpoints:"); info!(" POST /v1/chat/completions - OpenAI-compatible chat completions"); info!(" GET /v1/models - List available models"); - + axum::serve(listener, app).await?; - + Ok(()) -} \ No newline at end of file +} diff --git a/crates/inference-engine/src/server.rs b/crates/inference-engine/src/server.rs index a7c0f77..cd3095c 100644 --- a/crates/inference-engine/src/server.rs +++ b/crates/inference-engine/src/server.rs @@ -1,28 +1,28 @@ use axum::{ + Json, Router, extract::State, http::StatusCode, - response::{sse::Event, sse::Sse, IntoResponse}, + response::{IntoResponse, sse::Event, sse::Sse}, routing::{get, post}, - Json, Router, }; use futures_util::stream::{self, Stream}; use std::convert::Infallible; use std::str::FromStr; use std::sync::Arc; -use tokio::sync::{mpsc, Mutex}; +use tokio::sync::{Mutex, mpsc}; use tokio_stream::wrappers::UnboundedReceiverStream; use tower_http::cors::{Any, CorsLayer}; use uuid::Uuid; +use crate::Which; use crate::openai_types::{ ChatCompletionChoice, ChatCompletionChunk, ChatCompletionChunkChoice, ChatCompletionRequest, ChatCompletionResponse, Delta, Message, MessageContent, Model, ModelListResponse, Usage, }; -use crate::Which; use either::Either; use embeddings_engine::models_list; -use gemma_runner::{run_gemma_api, GemmaInferenceConfig, WhichModel}; -use llama_runner::{run_llama_inference, LlamaInferenceConfig}; +use gemma_runner::{GemmaInferenceConfig, WhichModel, run_gemma_api}; +use llama_runner::{LlamaInferenceConfig, run_llama_inference}; use serde_json::Value; // ------------------------- // Shared app state diff --git a/crates/predict-otron-9000/src/config.rs b/crates/predict-otron-9000/src/config.rs index bc41fd7..83aa87c 100644 --- a/crates/predict-otron-9000/src/config.rs +++ b/crates/predict-otron-9000/src/config.rs @@ -45,8 +45,6 @@ pub struct Services { pub embeddings_url: Option, } - - impl Default for ServerConfig { fn default() -> Self { Self { diff --git a/crates/predict-otron-9000/src/main.rs b/crates/predict-otron-9000/src/main.rs index b263190..ab27e5f 100644 --- a/crates/predict-otron-9000/src/main.rs +++ b/crates/predict-otron-9000/src/main.rs @@ -14,12 +14,12 @@ use std::env; #[cfg(feature = "ui")] use axum::http::StatusCode as AxumStatusCode; #[cfg(feature = "ui")] +use axum::http::Uri; +#[cfg(feature = "ui")] use axum::http::header; #[cfg(feature = "ui")] use axum::response::IntoResponse; #[cfg(feature = "ui")] -use axum::http::Uri; -#[cfg(feature = "ui")] use mime_guess::from_path; #[cfg(feature = "ui")] use rust_embed::Embed; @@ -28,7 +28,6 @@ use tower_http::cors::{Any, CorsLayer}; use tower_http::trace::TraceLayer; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; - #[cfg(feature = "ui")] #[derive(Embed)] #[folder = "../../target/site"] diff --git a/integration/cli/build.rs b/integration/cli/build.rs index d167bc9..4ba2ebb 100644 --- a/integration/cli/build.rs +++ b/integration/cli/build.rs @@ -24,8 +24,7 @@ fn run_build() -> io::Result<()> { let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo")); let output_path = out_dir.join("client-cli"); - let bun_tgt = BunTarget::from_cargo_env() - .map_err(|e| io::Error::other(e.to_string()))?; + let bun_tgt = BunTarget::from_cargo_env().map_err(|e| io::Error::other(e.to_string()))?; // Optional: warn if using a Bun target that’s marked unsupported in your chart if matches!(bun_tgt, BunTarget::WindowsArm64) { @@ -54,9 +53,9 @@ fn run_build() -> io::Result<()> { if !install_status.success() { let code = install_status.code().unwrap_or(1); - return Err(io::Error::other( - format!("bun install failed with status {code}"), - )); + return Err(io::Error::other(format!( + "bun install failed with status {code}" + ))); } let _target = env::var("TARGET").unwrap(); diff --git a/integration/cli/src/main.rs b/integration/cli/src/main.rs index 2113012..025f2b4 100644 --- a/integration/cli/src/main.rs +++ b/integration/cli/src/main.rs @@ -25,7 +25,7 @@ fn main() -> io::Result<()> { // Run it let status = Command::new(&tmp).arg("--version").status()?; if !status.success() { - return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed")); + return Err(io::Error::other("client-cli failed")); } Ok(()) diff --git a/integration/gemma-runner/src/gemma_api.rs b/integration/gemma-runner/src/gemma_api.rs index d45cb2b..8695284 100644 --- a/integration/gemma-runner/src/gemma_api.rs +++ b/integration/gemma-runner/src/gemma_api.rs @@ -205,7 +205,7 @@ impl TextGeneration { let start_gen = std::time::Instant::now(); - for (_generated_tokens, index) in (0..sample_len).enumerate() { + for index in 0..sample_len { let context_size = if index > 0 { 1 } else { tokens.len() }; let start_pos = tokens.len().saturating_sub(context_size); let ctxt = &tokens[start_pos..]; diff --git a/integration/gemma-runner/src/main.rs b/integration/gemma-runner/src/main.rs index 8205b49..7d56eea 100644 --- a/integration/gemma-runner/src/main.rs +++ b/integration/gemma-runner/src/main.rs @@ -6,10 +6,8 @@ mod gemma_api; mod gemma_cli; use anyhow::Error; -use clap::{Parser, ValueEnum}; use crate::gemma_cli::run_cli; -use std::io::Write; /// just a placeholder, not used for anything fn main() -> std::result::Result<(), Error> { diff --git a/integration/helm-chart-tool/src/main.rs b/integration/helm-chart-tool/src/main.rs index b3623bd..064bab8 100644 --- a/integration/helm-chart-tool/src/main.rs +++ b/integration/helm-chart-tool/src/main.rs @@ -28,7 +28,6 @@ struct KubeMetadata { port: u16, } - #[derive(Debug, Clone)] struct ServiceInfo { name: String, diff --git a/integration/llama-runner/src/main.rs b/integration/llama-runner/src/main.rs index 551e1bd..fc6daaa 100644 --- a/integration/llama-runner/src/main.rs +++ b/integration/llama-runner/src/main.rs @@ -6,9 +6,6 @@ mod llama_api; mod llama_cli; use anyhow::Result; -use clap::{Parser, ValueEnum}; - -use std::io::Write; use crate::llama_cli::run_cli; diff --git a/integration/utils/src/lib.rs b/integration/utils/src/lib.rs index b39cfd6..64cfaa3 100644 --- a/integration/utils/src/lib.rs +++ b/integration/utils/src/lib.rs @@ -124,10 +124,7 @@ pub fn hub_load_safetensors( } let safetensors_files = safetensors_files .iter() - .map(|v| { - repo.get(v) - .map_err(|e| std::io::Error::other(e)) - }) + .map(|v| repo.get(v).map_err(std::io::Error::other)) .collect::, std::io::Error>>()?; Ok(safetensors_files) }