mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
add root dockerfile that contains binaries for all services
This commit is contained in:
@@ -29,7 +29,7 @@ inference-engine = { path = "../inference-engine" }
|
||||
|
||||
# Dependencies for leptos web app
|
||||
#leptos-app = { path = "../leptos-app", features = ["ssr"] }
|
||||
chat-ui = { path = "../chat-ui", features = ["ssr", "hydrate"], optional = false }
|
||||
chat-ui = { path = "../chat-ui", features = ["ssr", "hydrate"], optional = true }
|
||||
|
||||
mime_guess = "2.0.5"
|
||||
log = "0.4.27"
|
||||
@@ -39,15 +39,20 @@ log = "0.4.27"
|
||||
name = "predict-otron-9000"
|
||||
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||
port = 8080
|
||||
|
||||
cmd = ["./bin/predict-otron-9000"]
|
||||
|
||||
# generates kubernetes manifests
|
||||
[package.metadata.kube]
|
||||
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||
replicas = 1
|
||||
port = 8080
|
||||
cmd = ["./bin/predict-otron-9000"]
|
||||
# SERVER_CONFIG Example: {\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}
|
||||
# you can generate this via node to avoid toil
|
||||
# const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
|
||||
# console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
|
||||
env = { SERVER_CONFIG = "<your-json-value-here>" }
|
||||
|
||||
[features]
|
||||
default = ["ui"]
|
||||
ui = ["dep:chat-ui"]
|
||||
|
@@ -1,89 +0,0 @@
|
||||
# ---- Build stage ----
|
||||
FROM rust:1-slim-bullseye AS builder
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Install build dependencies including CUDA toolkit for GPU support (needed for inference-engine dependency)
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
build-essential \
|
||||
wget \
|
||||
gnupg2 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CUDA toolkit (required for inference-engine dependency)
|
||||
# This is a minimal CUDA installation for building
|
||||
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-minimal-build-11-8 \
|
||||
libcublas-dev-11-8 \
|
||||
libcurand-dev-11-8 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& rm cuda-keyring_1.0-1_all.deb
|
||||
|
||||
# Set CUDA environment variables
|
||||
ENV CUDA_HOME=/usr/local/cuda
|
||||
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
||||
|
||||
# Copy the entire workspace to get access to all crates (needed for local dependencies)
|
||||
COPY . ./
|
||||
|
||||
# Cache dependencies first - create dummy source files for all crates
|
||||
RUN rm -rf crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src
|
||||
RUN mkdir -p crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src && \
|
||||
echo "fn main() {}" > crates/predict-otron-9000/src/main.rs && \
|
||||
echo "fn main() {}" > crates/inference-engine/src/main.rs && \
|
||||
echo "fn main() {}" > crates/inference-engine/src/cli_main.rs && \
|
||||
echo "// lib" > crates/inference-engine/src/lib.rs && \
|
||||
echo "fn main() {}" > crates/embeddings-engine/src/main.rs && \
|
||||
echo "// lib" > crates/embeddings-engine/src/lib.rs && \
|
||||
cargo build --release --bin predict-otron-9000 --package predict-otron-9000
|
||||
|
||||
# Remove dummy sources and copy real sources
|
||||
RUN rm -rf crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src
|
||||
COPY . .
|
||||
|
||||
# Build the actual binary
|
||||
RUN cargo build --release --bin predict-otron-9000 --package predict-otron-9000
|
||||
|
||||
# ---- Runtime stage ----
|
||||
FROM debian:bullseye-slim
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libssl1.1 \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CUDA runtime libraries (required for inference-engine dependency)
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
wget \
|
||||
gnupg2 \
|
||||
&& wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb \
|
||||
&& dpkg -i cuda-keyring_1.0-1_all.deb \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
cuda-cudart-11-8 \
|
||||
libcublas11 \
|
||||
libcurand10 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& rm cuda-keyring_1.0-1_all.deb \
|
||||
&& apt-get purge -y wget gnupg2
|
||||
|
||||
# Copy binary from builder
|
||||
COPY --from=builder /usr/src/app/target/release/predict-otron-9000 /usr/local/bin/
|
||||
|
||||
# Run as non-root user for safety
|
||||
RUN useradd -m appuser
|
||||
USER appuser
|
||||
|
||||
EXPOSE 8080
|
||||
CMD ["predict-otron-9000"]
|
@@ -4,27 +4,32 @@ mod middleware;
|
||||
mod standalone_mode;
|
||||
|
||||
use crate::standalone_mode::create_standalone_router;
|
||||
use axum::http::StatusCode as AxumStatusCode;
|
||||
use axum::http::header;
|
||||
use axum::response::IntoResponse;
|
||||
use axum::routing::get;
|
||||
use axum::{Router, http::Uri, response::Html, serve};
|
||||
use axum::{Router, serve};
|
||||
use config::ServerConfig;
|
||||
use ha_mode::create_ha_router;
|
||||
use inference_engine::AppState;
|
||||
use log::info;
|
||||
use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
|
||||
use mime_guess::from_path;
|
||||
use rust_embed::Embed;
|
||||
use std::env;
|
||||
use std::path::Component::ParentDir;
|
||||
|
||||
#[cfg(feature = "ui")]
|
||||
use axum::http::StatusCode as AxumStatusCode;
|
||||
#[cfg(feature = "ui")]
|
||||
use axum::http::header;
|
||||
#[cfg(feature = "ui")]
|
||||
use axum::response::IntoResponse;
|
||||
#[cfg(feature = "ui")]
|
||||
use axum::http::Uri;
|
||||
#[cfg(feature = "ui")]
|
||||
use mime_guess::from_path;
|
||||
#[cfg(feature = "ui")]
|
||||
use rust_embed::Embed;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::MakeService;
|
||||
use tower_http::classify::ServerErrorsFailureClass::StatusCode;
|
||||
use tower_http::cors::{Any, CorsLayer};
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
|
||||
#[cfg(feature = "ui")]
|
||||
#[derive(Embed)]
|
||||
#[folder = "../../target/site"]
|
||||
#[include = "*.js"]
|
||||
@@ -33,6 +38,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
#[include = "*.ico"]
|
||||
struct Asset;
|
||||
|
||||
#[cfg(feature = "ui")]
|
||||
async fn static_handler(uri: Uri) -> axum::response::Response {
|
||||
// Strip the leading `/`
|
||||
let path = uri.path().trim_start_matches('/');
|
||||
@@ -110,17 +116,22 @@ async fn main() {
|
||||
// Create metrics layer
|
||||
let metrics_layer = MetricsLayer::new(metrics_store);
|
||||
|
||||
let leptos_config = chat_ui::app::AppConfig::default();
|
||||
|
||||
// Create the leptos router for the web frontend
|
||||
let leptos_router = chat_ui::app::create_router(leptos_config.config.leptos_options);
|
||||
|
||||
// Merge the service router with base routes and add middleware layers
|
||||
let app = Router::new()
|
||||
.route("/pkg/{*path}", get(static_handler))
|
||||
let mut app = Router::new()
|
||||
.route("/health", get(|| async { "ok" }))
|
||||
.merge(service_router)
|
||||
.merge(leptos_router)
|
||||
.merge(service_router);
|
||||
|
||||
// Add UI routes if the UI feature is enabled
|
||||
#[cfg(feature = "ui")]
|
||||
{
|
||||
let leptos_config = chat_ui::app::AppConfig::default();
|
||||
let leptos_router = chat_ui::app::create_router(leptos_config.config.leptos_options);
|
||||
app = app
|
||||
.route("/pkg/{*path}", get(static_handler))
|
||||
.merge(leptos_router);
|
||||
}
|
||||
|
||||
let app = app
|
||||
.layer(metrics_layer) // Add metrics tracking
|
||||
.layer(cors)
|
||||
.layer(TraceLayer::new_for_http());
|
||||
@@ -141,6 +152,7 @@ async fn main() {
|
||||
);
|
||||
tracing::info!("Performance metrics tracking enabled - summary logs every 60 seconds");
|
||||
tracing::info!("Available endpoints:");
|
||||
#[cfg(feature = "ui")]
|
||||
tracing::info!(" GET / - Leptos chat web application");
|
||||
tracing::info!(" GET /health - Health check");
|
||||
tracing::info!(" POST /v1/models - List Models");
|
||||
|
Reference in New Issue
Block a user