mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
add root dockerfile that contains binaries for all services
This commit is contained in:
35
.dockerignore
Normal file
35
.dockerignore
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Git
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Rust
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
README.md
|
||||||
|
*.md
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# Dependencies
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Build artifacts
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.fastembed_cache
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -7041,7 +7041,7 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utils"
|
name = "utils"
|
||||||
version = "0.0.0"
|
version = "0.1.4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ab_glyph",
|
"ab_glyph",
|
||||||
"accelerate-src",
|
"accelerate-src",
|
||||||
|
50
Dockerfile
Normal file
50
Dockerfile
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
# Multi-stage build for predict-otron-9000 workspace
|
||||||
|
FROM rust:1 AS builder
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
pkg-config \
|
||||||
|
libssl-dev \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Create app directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy workspace files
|
||||||
|
COPY Cargo.toml Cargo.lock ./
|
||||||
|
COPY crates/ ./crates/
|
||||||
|
COPY integration/ ./integration/
|
||||||
|
|
||||||
|
# Build all 3 main server binaries in release mode
|
||||||
|
RUN cargo build --release -p predict-otron-9000 --bin predict-otron-9000 --no-default-features -p embeddings-engine --bin embeddings-engine -p inference-engine --bin inference-engine
|
||||||
|
|
||||||
|
# Runtime stage
|
||||||
|
FROM debian:bookworm-slim AS runtime
|
||||||
|
|
||||||
|
# Install runtime dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
ca-certificates \
|
||||||
|
libssl3 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Create app user
|
||||||
|
RUN useradd -r -s /bin/false -m -d /app appuser
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy binaries from builder stage
|
||||||
|
COPY --from=builder /app/target/release/predict-otron-9000 ./bin/
|
||||||
|
COPY --from=builder /app/target/release/embeddings-engine ./bin/
|
||||||
|
COPY --from=builder /app/target/release/inference-engine ./bin/
|
||||||
|
# Make binaries executable and change ownership
|
||||||
|
RUN chmod +x ./bin/* && chown -R appuser:appuser /app
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose ports (adjust as needed based on your services)
|
||||||
|
EXPOSE 8080 8081 8082
|
||||||
|
|
||||||
|
# Default command (can be overridden)
|
||||||
|
CMD ["./bin/predict-otron-9000"]
|
@@ -3,6 +3,7 @@ name = "chat-ui"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
crate-type = ["cdylib", "rlib"]
|
crate-type = ["cdylib", "rlib"]
|
||||||
|
|
||||||
@@ -122,3 +123,7 @@ lib-default-features = false
|
|||||||
#
|
#
|
||||||
# Optional. Defaults to "release".
|
# Optional. Defaults to "release".
|
||||||
lib-profile-release = "release"
|
lib-profile-release = "release"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "chat-ui"
|
||||||
|
path = "src/main.rs"
|
@@ -28,12 +28,13 @@ once_cell = "1.19.0"
|
|||||||
|
|
||||||
|
|
||||||
[package.metadata.compose]
|
[package.metadata.compose]
|
||||||
image = "ghcr.io/geoffsee/embeddings-service:latest"
|
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||||
port = 8080
|
port = 8080
|
||||||
|
cmd = ["./bin/embeddings-engine"]
|
||||||
|
|
||||||
# generates kubernetes manifests
|
# generates kubernetes manifests
|
||||||
[package.metadata.kube]
|
[package.metadata.kube]
|
||||||
image = "ghcr.io/geoffsee/embeddings-service:latest"
|
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||||
|
cmd = ["./bin/embeddings-engine"]
|
||||||
replicas = 1
|
replicas = 1
|
||||||
port = 8080
|
port = 8080
|
@@ -1,42 +0,0 @@
|
|||||||
# ---- Build stage ----
|
|
||||||
FROM rust:1-slim-bullseye AS builder
|
|
||||||
|
|
||||||
WORKDIR /usr/src/app
|
|
||||||
|
|
||||||
# Install build dependencies
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
pkg-config \
|
|
||||||
libssl-dev \
|
|
||||||
build-essential \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Cache deps first
|
|
||||||
COPY . ./
|
|
||||||
RUN rm -rf src
|
|
||||||
RUN mkdir src && echo "fn main() {}" > src/main.rs && echo "// lib" > src/lib.rs && cargo build --release
|
|
||||||
RUN rm -rf src
|
|
||||||
|
|
||||||
# Copy real sources and build
|
|
||||||
COPY . .
|
|
||||||
RUN cargo build --release
|
|
||||||
|
|
||||||
# ---- Runtime stage ----
|
|
||||||
FROM debian:bullseye-slim
|
|
||||||
|
|
||||||
# Install only what the compiled binary needs
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libssl1.1 \
|
|
||||||
ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Copy binary from builder
|
|
||||||
COPY --from=builder /usr/src/app/target/release/embeddings-engine /usr/local/bin/
|
|
||||||
|
|
||||||
# Run as non-root user for safety
|
|
||||||
RUN useradd -m appuser
|
|
||||||
USER appuser
|
|
||||||
|
|
||||||
EXPOSE 8080
|
|
||||||
CMD ["embeddings-engine"]
|
|
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "inference-engine"
|
name = "inference-engine"
|
||||||
version.workspace = true
|
version.workspace = true
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
candle-core = { git = "https://github.com/huggingface/candle.git" }
|
candle-core = { git = "https://github.com/huggingface/candle.git" }
|
||||||
@@ -31,14 +31,21 @@ utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
|||||||
uuid = { version = "1.7.0", features = ["v4"] }
|
uuid = { version = "1.7.0", features = ["v4"] }
|
||||||
reborrow = "0.5.5"
|
reborrow = "0.5.5"
|
||||||
futures-util = "0.3.31"
|
futures-util = "0.3.31"
|
||||||
gemma-runner = { path = "../../integration/gemma-runner", features = ["metal"] }
|
gemma-runner = { path = "../../integration/gemma-runner" }
|
||||||
llama-runner = { path = "../../integration/llama-runner", features = ["metal"]}
|
llama-runner = { path = "../../integration/llama-runner" }
|
||||||
embeddings-engine = { path = "../embeddings-engine" }
|
embeddings-engine = { path = "../embeddings-engine" }
|
||||||
|
|
||||||
|
[target.'cfg(target_os = "linux")'.dependencies]
|
||||||
|
candle-core = { git = "https://github.com/huggingface/candle.git", default-features = false }
|
||||||
|
candle-nn = { git = "https://github.com/huggingface/candle.git", default-features = false }
|
||||||
|
candle-transformers = { git = "https://github.com/huggingface/candle.git", default-features = false }
|
||||||
|
|
||||||
[target.'cfg(target_os = "macos")'.dependencies]
|
[target.'cfg(target_os = "macos")'.dependencies]
|
||||||
candle-core = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
|
candle-core = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
|
||||||
candle-nn = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
|
candle-nn = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
|
||||||
candle-transformers = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
|
candle-transformers = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
|
||||||
|
gemma-runner = { path = "../../integration/gemma-runner", features = ["metal"] }
|
||||||
|
llama-runner = { path = "../../integration/llama-runner", features = ["metal"] }
|
||||||
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
@@ -62,15 +69,19 @@ bindgen_cuda = { version = "0.1.1", optional = true }
|
|||||||
[features]
|
[features]
|
||||||
bin = []
|
bin = []
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "inference-engine"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
|
||||||
[package.metadata.compose]
|
[package.metadata.compose]
|
||||||
image = "ghcr.io/geoffsee/inference-engine:latest"
|
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||||
|
cmd = ["./bin/inference-engine"]
|
||||||
port = 8080
|
port = 8080
|
||||||
|
|
||||||
|
|
||||||
# generates kubernetes manifests
|
# generates kubernetes manifests
|
||||||
[package.metadata.kube]
|
[package.metadata.kube]
|
||||||
image = "ghcr.io/geoffsee/inference-service:latest"
|
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||||
replicas = 1
|
cmd = ["./bin/inference-engine"]
|
||||||
port = 8080
|
port = 8080
|
||||||
|
replicas = 1
|
||||||
|
@@ -1,86 +0,0 @@
|
|||||||
# ---- Build stage ----
|
|
||||||
FROM rust:1-slim-bullseye AS builder
|
|
||||||
|
|
||||||
WORKDIR /usr/src/app
|
|
||||||
|
|
||||||
# Install build dependencies including CUDA toolkit for GPU support
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
pkg-config \
|
|
||||||
libssl-dev \
|
|
||||||
build-essential \
|
|
||||||
wget \
|
|
||||||
gnupg2 \
|
|
||||||
curl \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CUDA toolkit (optional, for GPU support)
|
|
||||||
# This is a minimal CUDA installation for building
|
|
||||||
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
|
|
||||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-minimal-build-11-8 \
|
|
||||||
libcublas-dev-11-8 \
|
|
||||||
libcurand-dev-11-8 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& rm cuda-keyring_1.0-1_all.deb
|
|
||||||
|
|
||||||
# Set CUDA environment variables
|
|
||||||
ENV CUDA_HOME=/usr/local/cuda
|
|
||||||
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
|
||||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
|
||||||
|
|
||||||
# Copy the entire workspace to get access to all crates
|
|
||||||
COPY . ./
|
|
||||||
|
|
||||||
# Cache dependencies first - create dummy source files
|
|
||||||
RUN rm -rf crates/inference-engine/src
|
|
||||||
RUN mkdir -p crates/inference-engine/src && \
|
|
||||||
echo "fn main() {}" > crates/inference-engine/src/main.rs && \
|
|
||||||
echo "fn main() {}" > crates/inference-engine/src/cli_main.rs && \
|
|
||||||
echo "// lib" > crates/inference-engine/src/lib.rs && \
|
|
||||||
cargo build --release --bin cli --package inference-engine
|
|
||||||
|
|
||||||
# Remove dummy source and copy real sources
|
|
||||||
RUN rm -rf crates/inference-engine/src
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Build the actual CLI binary
|
|
||||||
RUN cargo build --release --bin cli --package inference-engine
|
|
||||||
|
|
||||||
# ---- Runtime stage ----
|
|
||||||
FROM debian:bullseye-slim
|
|
||||||
|
|
||||||
# Install runtime dependencies
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libssl1.1 \
|
|
||||||
ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CUDA runtime libraries (optional, for GPU support at runtime)
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
wget \
|
|
||||||
gnupg2 \
|
|
||||||
&& wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb \
|
|
||||||
&& dpkg -i cuda-keyring_1.0-1_all.deb \
|
|
||||||
&& apt-get update \
|
|
||||||
&& apt-get install -y --no-install-recommends \
|
|
||||||
cuda-cudart-11-8 \
|
|
||||||
libcublas11 \
|
|
||||||
libcurand10 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& rm cuda-keyring_1.0-1_all.deb \
|
|
||||||
&& apt-get purge -y wget gnupg2
|
|
||||||
|
|
||||||
# Copy binary from builder
|
|
||||||
COPY --from=builder /usr/src/app/target/release/cli /usr/local/bin/inference-cli
|
|
||||||
|
|
||||||
# Run as non-root user for safety
|
|
||||||
RUN useradd -m appuser
|
|
||||||
USER appuser
|
|
||||||
|
|
||||||
EXPOSE 8080
|
|
||||||
CMD ["inference-cli"]
|
|
23
crates/inference-engine/src/main.rs
Normal file
23
crates/inference-engine/src/main.rs
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
use inference_engine::{create_router, init_tracing, get_server_config, AppState};
|
||||||
|
use tokio::net::TcpListener;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
init_tracing();
|
||||||
|
|
||||||
|
let app_state = AppState::default();
|
||||||
|
let app = create_router(app_state);
|
||||||
|
|
||||||
|
let (server_host, server_port, server_address) = get_server_config();
|
||||||
|
let listener = TcpListener::bind(&server_address).await?;
|
||||||
|
|
||||||
|
info!("Inference Engine server starting on http://{}", server_address);
|
||||||
|
info!("Available endpoints:");
|
||||||
|
info!(" POST /v1/chat/completions - OpenAI-compatible chat completions");
|
||||||
|
info!(" GET /v1/models - List available models");
|
||||||
|
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@@ -29,7 +29,7 @@ inference-engine = { path = "../inference-engine" }
|
|||||||
|
|
||||||
# Dependencies for leptos web app
|
# Dependencies for leptos web app
|
||||||
#leptos-app = { path = "../leptos-app", features = ["ssr"] }
|
#leptos-app = { path = "../leptos-app", features = ["ssr"] }
|
||||||
chat-ui = { path = "../chat-ui", features = ["ssr", "hydrate"], optional = false }
|
chat-ui = { path = "../chat-ui", features = ["ssr", "hydrate"], optional = true }
|
||||||
|
|
||||||
mime_guess = "2.0.5"
|
mime_guess = "2.0.5"
|
||||||
log = "0.4.27"
|
log = "0.4.27"
|
||||||
@@ -39,15 +39,20 @@ log = "0.4.27"
|
|||||||
name = "predict-otron-9000"
|
name = "predict-otron-9000"
|
||||||
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||||
port = 8080
|
port = 8080
|
||||||
|
cmd = ["./bin/predict-otron-9000"]
|
||||||
|
|
||||||
# generates kubernetes manifests
|
# generates kubernetes manifests
|
||||||
[package.metadata.kube]
|
[package.metadata.kube]
|
||||||
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
image = "ghcr.io/geoffsee/predict-otron-9000:latest"
|
||||||
replicas = 1
|
replicas = 1
|
||||||
port = 8080
|
port = 8080
|
||||||
|
cmd = ["./bin/predict-otron-9000"]
|
||||||
# SERVER_CONFIG Example: {\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}
|
# SERVER_CONFIG Example: {\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}
|
||||||
# you can generate this via node to avoid toil
|
# you can generate this via node to avoid toil
|
||||||
# const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
|
# const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
|
||||||
# console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
|
# console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
|
||||||
env = { SERVER_CONFIG = "<your-json-value-here>" }
|
env = { SERVER_CONFIG = "<your-json-value-here>" }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["ui"]
|
||||||
|
ui = ["dep:chat-ui"]
|
||||||
|
@@ -1,89 +0,0 @@
|
|||||||
# ---- Build stage ----
|
|
||||||
FROM rust:1-slim-bullseye AS builder
|
|
||||||
|
|
||||||
WORKDIR /usr/src/app
|
|
||||||
|
|
||||||
# Install build dependencies including CUDA toolkit for GPU support (needed for inference-engine dependency)
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
pkg-config \
|
|
||||||
libssl-dev \
|
|
||||||
build-essential \
|
|
||||||
wget \
|
|
||||||
gnupg2 \
|
|
||||||
curl \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CUDA toolkit (required for inference-engine dependency)
|
|
||||||
# This is a minimal CUDA installation for building
|
|
||||||
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
|
|
||||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-minimal-build-11-8 \
|
|
||||||
libcublas-dev-11-8 \
|
|
||||||
libcurand-dev-11-8 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& rm cuda-keyring_1.0-1_all.deb
|
|
||||||
|
|
||||||
# Set CUDA environment variables
|
|
||||||
ENV CUDA_HOME=/usr/local/cuda
|
|
||||||
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
|
||||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
|
||||||
|
|
||||||
# Copy the entire workspace to get access to all crates (needed for local dependencies)
|
|
||||||
COPY . ./
|
|
||||||
|
|
||||||
# Cache dependencies first - create dummy source files for all crates
|
|
||||||
RUN rm -rf crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src
|
|
||||||
RUN mkdir -p crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src && \
|
|
||||||
echo "fn main() {}" > crates/predict-otron-9000/src/main.rs && \
|
|
||||||
echo "fn main() {}" > crates/inference-engine/src/main.rs && \
|
|
||||||
echo "fn main() {}" > crates/inference-engine/src/cli_main.rs && \
|
|
||||||
echo "// lib" > crates/inference-engine/src/lib.rs && \
|
|
||||||
echo "fn main() {}" > crates/embeddings-engine/src/main.rs && \
|
|
||||||
echo "// lib" > crates/embeddings-engine/src/lib.rs && \
|
|
||||||
cargo build --release --bin predict-otron-9000 --package predict-otron-9000
|
|
||||||
|
|
||||||
# Remove dummy sources and copy real sources
|
|
||||||
RUN rm -rf crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Build the actual binary
|
|
||||||
RUN cargo build --release --bin predict-otron-9000 --package predict-otron-9000
|
|
||||||
|
|
||||||
# ---- Runtime stage ----
|
|
||||||
FROM debian:bullseye-slim
|
|
||||||
|
|
||||||
# Install runtime dependencies
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libssl1.1 \
|
|
||||||
ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CUDA runtime libraries (required for inference-engine dependency)
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
wget \
|
|
||||||
gnupg2 \
|
|
||||||
&& wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb \
|
|
||||||
&& dpkg -i cuda-keyring_1.0-1_all.deb \
|
|
||||||
&& apt-get update \
|
|
||||||
&& apt-get install -y --no-install-recommends \
|
|
||||||
cuda-cudart-11-8 \
|
|
||||||
libcublas11 \
|
|
||||||
libcurand10 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& rm cuda-keyring_1.0-1_all.deb \
|
|
||||||
&& apt-get purge -y wget gnupg2
|
|
||||||
|
|
||||||
# Copy binary from builder
|
|
||||||
COPY --from=builder /usr/src/app/target/release/predict-otron-9000 /usr/local/bin/
|
|
||||||
|
|
||||||
# Run as non-root user for safety
|
|
||||||
RUN useradd -m appuser
|
|
||||||
USER appuser
|
|
||||||
|
|
||||||
EXPOSE 8080
|
|
||||||
CMD ["predict-otron-9000"]
|
|
@@ -4,27 +4,32 @@ mod middleware;
|
|||||||
mod standalone_mode;
|
mod standalone_mode;
|
||||||
|
|
||||||
use crate::standalone_mode::create_standalone_router;
|
use crate::standalone_mode::create_standalone_router;
|
||||||
use axum::http::StatusCode as AxumStatusCode;
|
|
||||||
use axum::http::header;
|
|
||||||
use axum::response::IntoResponse;
|
|
||||||
use axum::routing::get;
|
use axum::routing::get;
|
||||||
use axum::{Router, http::Uri, response::Html, serve};
|
use axum::{Router, serve};
|
||||||
use config::ServerConfig;
|
use config::ServerConfig;
|
||||||
use ha_mode::create_ha_router;
|
use ha_mode::create_ha_router;
|
||||||
use inference_engine::AppState;
|
|
||||||
use log::info;
|
|
||||||
use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
|
use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
|
||||||
use mime_guess::from_path;
|
|
||||||
use rust_embed::Embed;
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::path::Component::ParentDir;
|
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
use axum::http::StatusCode as AxumStatusCode;
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
use axum::http::header;
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
use axum::response::IntoResponse;
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
use axum::http::Uri;
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
use mime_guess::from_path;
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
use rust_embed::Embed;
|
||||||
use tokio::net::TcpListener;
|
use tokio::net::TcpListener;
|
||||||
use tower::MakeService;
|
|
||||||
use tower_http::classify::ServerErrorsFailureClass::StatusCode;
|
|
||||||
use tower_http::cors::{Any, CorsLayer};
|
use tower_http::cors::{Any, CorsLayer};
|
||||||
use tower_http::trace::TraceLayer;
|
use tower_http::trace::TraceLayer;
|
||||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
#[derive(Embed)]
|
#[derive(Embed)]
|
||||||
#[folder = "../../target/site"]
|
#[folder = "../../target/site"]
|
||||||
#[include = "*.js"]
|
#[include = "*.js"]
|
||||||
@@ -33,6 +38,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
|||||||
#[include = "*.ico"]
|
#[include = "*.ico"]
|
||||||
struct Asset;
|
struct Asset;
|
||||||
|
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
async fn static_handler(uri: Uri) -> axum::response::Response {
|
async fn static_handler(uri: Uri) -> axum::response::Response {
|
||||||
// Strip the leading `/`
|
// Strip the leading `/`
|
||||||
let path = uri.path().trim_start_matches('/');
|
let path = uri.path().trim_start_matches('/');
|
||||||
@@ -110,17 +116,22 @@ async fn main() {
|
|||||||
// Create metrics layer
|
// Create metrics layer
|
||||||
let metrics_layer = MetricsLayer::new(metrics_store);
|
let metrics_layer = MetricsLayer::new(metrics_store);
|
||||||
|
|
||||||
let leptos_config = chat_ui::app::AppConfig::default();
|
|
||||||
|
|
||||||
// Create the leptos router for the web frontend
|
|
||||||
let leptos_router = chat_ui::app::create_router(leptos_config.config.leptos_options);
|
|
||||||
|
|
||||||
// Merge the service router with base routes and add middleware layers
|
// Merge the service router with base routes and add middleware layers
|
||||||
let app = Router::new()
|
let mut app = Router::new()
|
||||||
.route("/pkg/{*path}", get(static_handler))
|
|
||||||
.route("/health", get(|| async { "ok" }))
|
.route("/health", get(|| async { "ok" }))
|
||||||
.merge(service_router)
|
.merge(service_router);
|
||||||
.merge(leptos_router)
|
|
||||||
|
// Add UI routes if the UI feature is enabled
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
|
{
|
||||||
|
let leptos_config = chat_ui::app::AppConfig::default();
|
||||||
|
let leptos_router = chat_ui::app::create_router(leptos_config.config.leptos_options);
|
||||||
|
app = app
|
||||||
|
.route("/pkg/{*path}", get(static_handler))
|
||||||
|
.merge(leptos_router);
|
||||||
|
}
|
||||||
|
|
||||||
|
let app = app
|
||||||
.layer(metrics_layer) // Add metrics tracking
|
.layer(metrics_layer) // Add metrics tracking
|
||||||
.layer(cors)
|
.layer(cors)
|
||||||
.layer(TraceLayer::new_for_http());
|
.layer(TraceLayer::new_for_http());
|
||||||
@@ -141,6 +152,7 @@ async fn main() {
|
|||||||
);
|
);
|
||||||
tracing::info!("Performance metrics tracking enabled - summary logs every 60 seconds");
|
tracing::info!("Performance metrics tracking enabled - summary logs every 60 seconds");
|
||||||
tracing::info!("Available endpoints:");
|
tracing::info!("Available endpoints:");
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
tracing::info!(" GET / - Leptos chat web application");
|
tracing::info!(" GET / - Leptos chat web application");
|
||||||
tracing::info!(" GET /health - Health check");
|
tracing::info!(" GET /health - Health check");
|
||||||
tracing::info!(" POST /v1/models - List Models");
|
tracing::info!(" POST /v1/models - List Models");
|
||||||
|
@@ -1,17 +1,15 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "utils"
|
name = "utils"
|
||||||
|
version = "0.1.4"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "src/lib.rs"
|
path = "src/lib.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
accelerate-src = {version = "0.3.2", optional = true }
|
accelerate-src = {version = "0.3.2", optional = true }
|
||||||
candle-nn = {version = "0.9.1" }
|
|
||||||
candle-transformers = {version = "0.9.1" }
|
|
||||||
|
|
||||||
candle-flash-attn = {version = "0.9.1", optional = true }
|
candle-flash-attn = {version = "0.9.1", optional = true }
|
||||||
candle-onnx = {version = "0.9.1", optional = true }
|
candle-onnx = {version = "0.9.1", optional = true }
|
||||||
candle-core="0.9.1"
|
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
anyhow = "1.0.99"
|
anyhow = "1.0.99"
|
||||||
cudarc = {version = "0.17.3", optional = true }
|
cudarc = {version = "0.17.3", optional = true }
|
||||||
@@ -86,3 +84,14 @@ mimi = ["cpal", "symphonia", "rubato"]
|
|||||||
snac = ["cpal", "symphonia", "rubato"]
|
snac = ["cpal", "symphonia", "rubato"]
|
||||||
depth_anything_v2 = ["palette", "enterpolation"]
|
depth_anything_v2 = ["palette", "enterpolation"]
|
||||||
tekken = ["tekken-rs"]
|
tekken = ["tekken-rs"]
|
||||||
|
|
||||||
|
# Platform-specific candle dependencies
|
||||||
|
[target.'cfg(target_os = "linux")'.dependencies]
|
||||||
|
candle-nn = {version = "0.9.1", default-features = false }
|
||||||
|
candle-transformers = {version = "0.9.1", default-features = false }
|
||||||
|
candle-core = {version = "0.9.1", default-features = false }
|
||||||
|
|
||||||
|
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
||||||
|
candle-nn = {version = "0.9.1" }
|
||||||
|
candle-transformers = {version = "0.9.1" }
|
||||||
|
candle-core = {version = "0.9.1" }
|
Reference in New Issue
Block a user