Files
predict-otron-9001/docker-compose.yml
2025-08-28 12:06:22 -04:00

38 lines
967 B
YAML

services:
# Main orchestration server - handles inference and embeddings
predict-otron-9000:
build:
context: .
dockerfile: crates/predict-otron-9000/Dockerfile
ports:
- "8080:8080"
environment:
- SERVER_PORT=8080
- RUST_LOG=${RUST_LOG:-info}
- HF_TOKEN=${HF_TOKEN}
- HF_HOME=/app/.hf-cache
volumes:
# Mount HF cache to persist downloaded models
- hf-cache:/app/.hf-cache
# Mount FastEmbed cache for embeddings
- fastembed-cache:/app/.fastembed_cache
networks:
- predict-otron-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080"]
interval: 5s
timeout: 1s
retries: 10
start_period: 10s
volumes:
# Persistent storage for Hugging Face model cache
hf-cache:
driver: local
# Persistent storage for FastEmbed model cache
fastembed-cache:
driver: local
networks:
predict-otron-network:
driver: bridge