mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
predict-otron-9000 serves a leptos SSR frontend
This commit is contained in:
@@ -1,22 +1,21 @@
|
||||
mod middleware;
|
||||
mod config;
|
||||
mod middleware;
|
||||
mod proxy;
|
||||
|
||||
use axum::{
|
||||
Router,
|
||||
serve,
|
||||
};
|
||||
use std::env;
|
||||
use axum::response::IntoResponse;
|
||||
use axum::routing::get;
|
||||
use tokio::net::TcpListener;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tower_http::cors::{Any, CorsLayer};
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
use inference_engine::AppState;
|
||||
use middleware::{MetricsStore, MetricsLoggerFuture, MetricsLayer};
|
||||
use axum::{Router, http::Uri, response::Html, serve};
|
||||
use config::ServerConfig;
|
||||
use inference_engine::AppState;
|
||||
use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
|
||||
use proxy::create_proxy_router;
|
||||
|
||||
use rust_embed::Embed;
|
||||
use std::env;
|
||||
use tokio::net::TcpListener;
|
||||
use tower_http::classify::ServerErrorsFailureClass::StatusCode;
|
||||
use tower_http::cors::{Any, CorsLayer};
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -34,13 +33,12 @@ async fn main() {
|
||||
.with(tracing_subscriber::fmt::layer())
|
||||
.init();
|
||||
|
||||
|
||||
// Initialize metrics store for performance tracking
|
||||
let metrics_store = MetricsStore::new();
|
||||
|
||||
|
||||
// Create a metrics logger that will periodically log metrics (every 60 seconds)
|
||||
let metrics_logger = MetricsLoggerFuture::new(metrics_store.clone(), 60);
|
||||
|
||||
|
||||
// Spawn the metrics logger in a background task
|
||||
tokio::spawn(metrics_logger);
|
||||
|
||||
@@ -55,19 +53,22 @@ async fn main() {
|
||||
let service_router = if server_config.clone().is_high_availability() {
|
||||
tracing::info!("Running in HighAvailability mode - proxying to external services");
|
||||
tracing::info!(" Inference service URL: {}", server_config.inference_url());
|
||||
tracing::info!(" Embeddings service URL: {}", server_config.embeddings_url());
|
||||
tracing::info!(
|
||||
" Embeddings service URL: {}",
|
||||
server_config.embeddings_url()
|
||||
);
|
||||
|
||||
// Use proxy router that forwards requests to external services
|
||||
create_proxy_router(server_config.clone())
|
||||
} else {
|
||||
tracing::info!("Running in Local mode - using embedded services");
|
||||
tracing::info!("Running in Standalone mode - using embedded services");
|
||||
|
||||
// Create unified router by merging embeddings and inference routers (existing behavior)
|
||||
let embeddings_router = embeddings_engine::create_embeddings_router();
|
||||
|
||||
// Create AppState with correct model configuration
|
||||
use inference_engine::server::{PipelineArgs, build_pipeline};
|
||||
use inference_engine::Which;
|
||||
use inference_engine::server::{PipelineArgs, build_pipeline};
|
||||
let mut pipeline_args = PipelineArgs::default();
|
||||
pipeline_args.model_id = "google/gemma-3-1b-it".to_string();
|
||||
pipeline_args.which = Which::InstructV3_1B;
|
||||
@@ -98,39 +99,41 @@ async fn main() {
|
||||
// Create metrics layer
|
||||
let metrics_layer = MetricsLayer::new(metrics_store);
|
||||
|
||||
// Create the leptos router for the web frontend
|
||||
let leptos_router = leptos_app::create_leptos_router();
|
||||
|
||||
// Merge the service router with base routes and add middleware layers
|
||||
let app = Router::new()
|
||||
.route("/", get(|| async { "API ready. This can serve the Leptos web app, but it doesn't." }))
|
||||
.route("/health", get(|| async { "ok" }))
|
||||
.merge(service_router)
|
||||
.layer(metrics_layer) // Add metrics tracking
|
||||
.merge(leptos_router) // Add leptos web frontend routes
|
||||
.layer(metrics_layer) // Add metrics tracking
|
||||
.layer(cors)
|
||||
.layer(TraceLayer::new_for_http());
|
||||
|
||||
// Server configuration
|
||||
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| {
|
||||
String::from(default_host)
|
||||
});
|
||||
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| String::from(default_host));
|
||||
|
||||
let server_port = env::var("SERVER_PORT").map(|v| v.parse::<u16>().unwrap_or(default_port)).unwrap_or_else(|_| {
|
||||
default_port
|
||||
});
|
||||
let server_port = env::var("SERVER_PORT")
|
||||
.map(|v| v.parse::<u16>().unwrap_or(default_port))
|
||||
.unwrap_or_else(|_| default_port);
|
||||
|
||||
let server_address = format!("{}:{}", server_host, server_port);
|
||||
|
||||
|
||||
let listener = TcpListener::bind(&server_address).await.unwrap();
|
||||
tracing::info!("Unified predict-otron-9000 server listening on {}", listener.local_addr().unwrap());
|
||||
tracing::info!(
|
||||
"Unified predict-otron-9000 server listening on {}",
|
||||
listener.local_addr().unwrap()
|
||||
);
|
||||
tracing::info!("Performance metrics tracking enabled - summary logs every 60 seconds");
|
||||
tracing::info!("Available endpoints:");
|
||||
tracing::info!(" GET / - Root endpoint from embeddings-engine");
|
||||
tracing::info!(" POST /v1/embeddings - Text embeddings");
|
||||
tracing::info!(" POST /v1/chat/completions - Chat completions");
|
||||
tracing::info!(" GET / - Leptos chat web application");
|
||||
tracing::info!(" GET /health - Health check");
|
||||
tracing::info!(" POST /v1/embeddings - Text embeddings API");
|
||||
tracing::info!(" POST /v1/chat/completions - Chat completions API");
|
||||
|
||||
serve(listener, app).await.unwrap();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Chat completions handler that properly uses the inference server crate's error handling
|
||||
// This function is no longer needed as we're using the inference_engine router directly
|
||||
|
Reference in New Issue
Block a user