Remove ROOT_CAUSE_ANALYSIS.md and outdated server logs

This commit is contained in:
geoffsee
2025-08-28 08:26:18 -04:00
parent b606adbe5d
commit c8b3561e36
11 changed files with 220 additions and 547 deletions

View File

@@ -0,0 +1,42 @@
# ---- Build stage ----
FROM rust:1-slim-bullseye AS builder
WORKDIR /usr/src/app
# Install build dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
pkg-config \
libssl-dev \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Cache deps first
COPY . ./
RUN rm -rf src
RUN mkdir src && echo "fn main() {}" > src/main.rs && echo "// lib" > src/lib.rs && cargo build --release
RUN rm -rf src
# Copy real sources and build
COPY . .
RUN cargo build --release
# ---- Runtime stage ----
FROM debian:bullseye-slim
# Install only what the compiled binary needs
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libssl1.1 \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Copy binary from builder
COPY --from=builder /usr/src/app/target/release/embeddings-engine /usr/local/bin/
# Run as non-root user for safety
RUN useradd -m appuser
USER appuser
EXPOSE 8080
CMD ["embeddings-engine"]

View File

@@ -25,10 +25,6 @@ static EMBEDDING_MODEL: Lazy<TextEmbedding> = Lazy::new(|| {
model
});
pub async fn root() -> &'static str {
"Hello, World!"
}
pub async fn embeddings_create(
Json(payload): Json<CreateEmbeddingRequest>,
) -> ResponseJson<serde_json::Value> {

View File

@@ -13,9 +13,6 @@ use tracing;
const DEFAULT_SERVER_HOST: &str = "127.0.0.1";
const DEFAULT_SERVER_PORT: &str = "8080";
async fn root() -> &'static str {
"Hello, World!"
}
async fn embeddings_create(
Json(payload): Json<CreateEmbeddingRequest>,
@@ -162,24 +159,6 @@ mod tests {
use axum::http::StatusCode;
use tower::ServiceExt;
#[tokio::test]
async fn test_root() {
let app = create_app();
let response = app
.oneshot(
axum::http::Request::builder()
.uri("/")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
assert_eq!(&body[..], b"Hello, World!");
}
#[tokio::test]
async fn test_embeddings_create() {
// Start a test server

View File

@@ -0,0 +1,86 @@
# ---- Build stage ----
FROM rust:1-slim-bullseye AS builder
WORKDIR /usr/src/app
# Install build dependencies including CUDA toolkit for GPU support
RUN apt-get update && \
apt-get install -y --no-install-recommends \
pkg-config \
libssl-dev \
build-essential \
wget \
gnupg2 \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install CUDA toolkit (optional, for GPU support)
# This is a minimal CUDA installation for building
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-minimal-build-11-8 \
libcublas-dev-11-8 \
libcurand-dev-11-8 \
&& rm -rf /var/lib/apt/lists/* \
&& rm cuda-keyring_1.0-1_all.deb
# Set CUDA environment variables
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
# Copy the entire workspace to get access to all crates
COPY . ./
# Cache dependencies first - create dummy source files
RUN rm -rf crates/inference-engine/src
RUN mkdir -p crates/inference-engine/src && \
echo "fn main() {}" > crates/inference-engine/src/main.rs && \
echo "fn main() {}" > crates/inference-engine/src/cli_main.rs && \
echo "// lib" > crates/inference-engine/src/lib.rs && \
cargo build --release --bin cli --package inference-engine
# Remove dummy source and copy real sources
RUN rm -rf crates/inference-engine/src
COPY . .
# Build the actual CLI binary
RUN cargo build --release --bin cli --package inference-engine
# ---- Runtime stage ----
FROM debian:bullseye-slim
# Install runtime dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libssl1.1 \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Install CUDA runtime libraries (optional, for GPU support at runtime)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget \
gnupg2 \
&& wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb \
&& dpkg -i cuda-keyring_1.0-1_all.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
cuda-cudart-11-8 \
libcublas11 \
libcurand10 \
&& rm -rf /var/lib/apt/lists/* \
&& rm cuda-keyring_1.0-1_all.deb \
&& apt-get purge -y wget gnupg2
# Copy binary from builder
COPY --from=builder /usr/src/app/target/release/cli /usr/local/bin/inference-cli
# Run as non-root user for safety
RUN useradd -m appuser
USER appuser
EXPOSE 8080
CMD ["inference-cli"]

View File

@@ -0,0 +1,89 @@
# ---- Build stage ----
FROM rust:1-slim-bullseye AS builder
WORKDIR /usr/src/app
# Install build dependencies including CUDA toolkit for GPU support (needed for inference-engine dependency)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
pkg-config \
libssl-dev \
build-essential \
wget \
gnupg2 \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install CUDA toolkit (required for inference-engine dependency)
# This is a minimal CUDA installation for building
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-minimal-build-11-8 \
libcublas-dev-11-8 \
libcurand-dev-11-8 \
&& rm -rf /var/lib/apt/lists/* \
&& rm cuda-keyring_1.0-1_all.deb
# Set CUDA environment variables
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
# Copy the entire workspace to get access to all crates (needed for local dependencies)
COPY . ./
# Cache dependencies first - create dummy source files for all crates
RUN rm -rf crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src
RUN mkdir -p crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src && \
echo "fn main() {}" > crates/predict-otron-9000/src/main.rs && \
echo "fn main() {}" > crates/inference-engine/src/main.rs && \
echo "fn main() {}" > crates/inference-engine/src/cli_main.rs && \
echo "// lib" > crates/inference-engine/src/lib.rs && \
echo "fn main() {}" > crates/embeddings-engine/src/main.rs && \
echo "// lib" > crates/embeddings-engine/src/lib.rs && \
cargo build --release --bin predict-otron-9000 --package predict-otron-9000
# Remove dummy sources and copy real sources
RUN rm -rf crates/predict-otron-9000/src crates/inference-engine/src crates/embeddings-engine/src
COPY . .
# Build the actual binary
RUN cargo build --release --bin predict-otron-9000 --package predict-otron-9000
# ---- Runtime stage ----
FROM debian:bullseye-slim
# Install runtime dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libssl1.1 \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Install CUDA runtime libraries (required for inference-engine dependency)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget \
gnupg2 \
&& wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb \
&& dpkg -i cuda-keyring_1.0-1_all.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
cuda-cudart-11-8 \
libcublas11 \
libcurand10 \
&& rm -rf /var/lib/apt/lists/* \
&& rm cuda-keyring_1.0-1_all.deb \
&& apt-get purge -y wget gnupg2
# Copy binary from builder
COPY --from=builder /usr/src/app/target/release/predict-otron-9000 /usr/local/bin/
# Run as non-root user for safety
RUN useradd -m appuser
USER appuser
EXPOSE 8080
CMD ["predict-otron-9000"]