mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
Integrate create_inference_router
from inference-engine
into predict-otron-9000
, simplify server routing, and update dependencies to unify versions.
This commit is contained in:
@@ -34,10 +34,10 @@ anyhow = "1.0.98"
|
||||
clap= { version = "4.2.4", features = ["derive"] }
|
||||
tracing = "0.1.37"
|
||||
tracing-chrome = "0.7.1"
|
||||
tracing-subscriber = "0.3.7"
|
||||
axum = { version = "0.7.4", features = ["json"] }
|
||||
tower = "0.4.13"
|
||||
tower-http = { version = "0.5.1", features = ["cors"] }
|
||||
tracing-subscriber = { version = "0.3.7", features = ["env-filter"] }
|
||||
axum = { version = "0.8.4", features = ["json"] }
|
||||
tower = "0.5.2"
|
||||
tower-http = { version = "0.6.6", features = ["cors"] }
|
||||
tokio = { version = "1.43.0", features = ["full"] }
|
||||
either = { version = "1.9.0", features = ["serde"] }
|
||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||
|
@@ -10,4 +10,61 @@ pub mod server;
|
||||
// Re-export key components for easier access
|
||||
pub use model::{Model, Which};
|
||||
pub use text_generation::TextGeneration;
|
||||
pub use token_output_stream::TokenOutputStream;
|
||||
pub use token_output_stream::TokenOutputStream;
|
||||
pub use server::{AppState, create_router};
|
||||
|
||||
use axum::{Json, http::StatusCode, routing::post, Router};
|
||||
use serde_json;
|
||||
use std::env;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
/// Server configuration constants
|
||||
pub const DEFAULT_SERVER_HOST: &str = "0.0.0.0";
|
||||
pub const DEFAULT_SERVER_PORT: &str = "8080";
|
||||
|
||||
/// Get server configuration from environment variables with defaults
|
||||
pub fn get_server_config() -> (String, String, String) {
|
||||
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| DEFAULT_SERVER_HOST.to_string());
|
||||
let server_port = env::var("SERVER_PORT").unwrap_or_else(|_| DEFAULT_SERVER_PORT.to_string());
|
||||
let server_address = format!("{}:{}", server_host, server_port);
|
||||
(server_host, server_port, server_address)
|
||||
}
|
||||
|
||||
/// Initialize tracing with configurable log levels
|
||||
pub fn init_tracing() {
|
||||
tracing_subscriber::registry()
|
||||
.with(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| {
|
||||
format!(
|
||||
"{}=debug,tower_http=debug,axum::rejection=trace",
|
||||
env!("CARGO_CRATE_NAME")
|
||||
)
|
||||
.into()
|
||||
}),
|
||||
)
|
||||
.with(tracing_subscriber::fmt::layer())
|
||||
.init();
|
||||
}
|
||||
|
||||
/// Create a simplified inference router that returns appropriate error messages
|
||||
/// indicating that full model loading is required for production use
|
||||
pub fn create_inference_router() -> Router {
|
||||
Router::new()
|
||||
.route("/v1/chat/completions", post(simplified_chat_completions))
|
||||
}
|
||||
|
||||
async fn simplified_chat_completions(
|
||||
axum::Json(request): axum::Json<serde_json::Value>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> {
|
||||
// Return the same error message as the actual server implementation
|
||||
// to indicate that full inference functionality requires proper model initialization
|
||||
Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({
|
||||
"error": {
|
||||
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
|
||||
"type": "unsupported_api"
|
||||
}
|
||||
})),
|
||||
))
|
||||
}
|
Reference in New Issue
Block a user