Integrate create_inference_router from inference-engine into predict-otron-9000, simplify server routing, and update dependencies to unify versions.

2025-09-08 22:46:44 +00:00 · 2025-08-16 19:53:21 -04:00
parent 411ad78026
commit b8ba994783
4 changed files with 86 additions and 168 deletions
--- a/crates/inference-engine/Cargo.toml
+++ b/crates/inference-engine/Cargo.toml
@@ -34,10 +34,10 @@ anyhow = "1.0.98"
 clap= { version = "4.2.4", features = ["derive"] }
 tracing = "0.1.37"
 tracing-chrome = "0.7.1"
-tracing-subscriber = "0.3.7"
-axum = { version = "0.7.4", features = ["json"] }
-tower = "0.4.13"
-tower-http = { version = "0.5.1", features = ["cors"] }
+tracing-subscriber = { version = "0.3.7", features = ["env-filter"] }
+axum = { version = "0.8.4", features = ["json"] }
+tower = "0.5.2"
+tower-http = { version = "0.6.6", features = ["cors"] }
 tokio = { version = "1.43.0", features = ["full"] }
 either = { version = "1.9.0", features = ["serde"] }
 utoipa = { version = "4.2.0", features = ["axum_extras"] }
--- a/crates/inference-engine/src/lib.rs
+++ b/crates/inference-engine/src/lib.rs
@@ -10,4 +10,61 @@ pub mod server;
 // Re-export key components for easier access
 pub use model::{Model, Which};
 pub use text_generation::TextGeneration;
-pub use token_output_stream::TokenOutputStream;
+pub use token_output_stream::TokenOutputStream;
+pub use server::{AppState, create_router};
+
+use axum::{Json, http::StatusCode, routing::post, Router};
+use serde_json;
+use std::env;
+use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
+
+/// Server configuration constants
+pub const DEFAULT_SERVER_HOST: &str = "0.0.0.0";
+pub const DEFAULT_SERVER_PORT: &str = "8080";
+
+/// Get server configuration from environment variables with defaults
+pub fn get_server_config() -> (String, String, String) {
+    let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| DEFAULT_SERVER_HOST.to_string());
+    let server_port = env::var("SERVER_PORT").unwrap_or_else(|_| DEFAULT_SERVER_PORT.to_string());
+    let server_address = format!("{}:{}", server_host, server_port);
+    (server_host, server_port, server_address)
+}
+
+/// Initialize tracing with configurable log levels
+pub fn init_tracing() {
+    tracing_subscriber::registry()
+        .with(
+            tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| {
+                format!(
+                    "{}=debug,tower_http=debug,axum::rejection=trace",
+                    env!("CARGO_CRATE_NAME")
+                )
+                .into()
+            }),
+        )
+        .with(tracing_subscriber::fmt::layer())
+        .init();
+}
+
+/// Create a simplified inference router that returns appropriate error messages
+/// indicating that full model loading is required for production use
+pub fn create_inference_router() -> Router {
+    Router::new()
+        .route("/v1/chat/completions", post(simplified_chat_completions))
+}
+
+async fn simplified_chat_completions(
+    axum::Json(request): axum::Json<serde_json::Value>,
+) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> {
+    // Return the same error message as the actual server implementation
+    // to indicate that full inference functionality requires proper model initialization
+    Err((
+        StatusCode::BAD_REQUEST,
+        Json(serde_json::json!({
+            "error": {
+                "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
+                "type": "unsupported_api"
+            }
+        })),
+    ))
+}