mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
54 lines
1.4 KiB
YAML
54 lines
1.4 KiB
YAML
services:
|
|
# Main orchestration server - handles inference and embeddings
|
|
predict-otron-9000:
|
|
build:
|
|
context: .
|
|
dockerfile: crates/predict-otron-9000/Dockerfile
|
|
ports:
|
|
- "8080:8080"
|
|
environment:
|
|
- SERVER_PORT=8080
|
|
- RUST_LOG=${RUST_LOG:-info}
|
|
- HF_TOKEN=${HF_TOKEN}
|
|
- HF_HOME=/app/.hf-cache
|
|
volumes:
|
|
# Mount HF cache to persist downloaded models
|
|
- hf-cache:/app/.hf-cache
|
|
# Mount FastEmbed cache for embeddings
|
|
- fastembed-cache:/app/.fastembed_cache
|
|
networks:
|
|
- predict-otron-network
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8080"]
|
|
interval: 5s
|
|
timeout: 1s
|
|
retries: 10
|
|
start_period: 10s
|
|
|
|
# Web frontend - Leptos WASM chat interface
|
|
leptos-chat:
|
|
build:
|
|
context: crates/leptos-chat
|
|
dockerfile: Dockerfile
|
|
ports:
|
|
- "8788:8788"
|
|
depends_on:
|
|
predict-otron-9000:
|
|
condition: service_healthy
|
|
networks:
|
|
- predict-otron-network
|
|
environment:
|
|
# Configure API endpoint for the frontend to connect to backend
|
|
- API_BASE_URL=http://predict-otron-9000:8080
|
|
|
|
volumes:
|
|
# Persistent storage for Hugging Face model cache
|
|
hf-cache:
|
|
driver: local
|
|
# Persistent storage for FastEmbed model cache
|
|
fastembed-cache:
|
|
driver: local
|
|
|
|
networks:
|
|
predict-otron-network:
|
|
driver: bridge |