mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
50 lines
1.4 KiB
Bash
Executable File
50 lines
1.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Simple curl helper for streaming chat completions (SSE)
|
|
# Usage:
|
|
# scripts/curl_chat_stream.sh "Who was the 16th president of the United States?"
|
|
# MODEL_ID=google/gemma-2b-it scripts/curl_chat_stream.sh "Hello!"
|
|
|
|
SERVER_URL=${SERVER_URL:-http://localhost:8080}
|
|
MODEL_ID=${MODEL_ID:-gemma-3-1b-it}
|
|
PROMPT=${1:-"What is the capital of France?"}
|
|
MAX_TOKENS=${MAX_TOKENS:-128}
|
|
# Timeout controls (seconds)
|
|
CONNECT_TIMEOUT=${CONNECT_TIMEOUT:-10}
|
|
MAX_TIME=${MAX_TIME:-30}
|
|
|
|
cat <<EOF
|
|
[info] POST $SERVER_URL/v1/chat/completions (SSE)
|
|
[info] model=$MODEL_ID, max_tokens=$MAX_TOKENS
|
|
[info] prompt=$PROMPT
|
|
[info] timeouts: connect=${CONNECT_TIMEOUT}s, max=${MAX_TIME}s
|
|
EOF
|
|
|
|
# Quick preflight to avoid long hangs when server is down
|
|
if ! curl -sS -o /dev/null -w "%{http_code}" \
|
|
--connect-timeout "$CONNECT_TIMEOUT" \
|
|
--max-time "$CONNECT_TIMEOUT" \
|
|
"$SERVER_URL/" | grep -qE '^(200|3..)'; then
|
|
echo "[warn] Server not reachable at $SERVER_URL (preflight failed)."
|
|
echo "[hint] Start it with ./run_server.sh or adjust SERVER_URL."
|
|
exit 7
|
|
fi
|
|
|
|
curl -N -sS -X POST \
|
|
--connect-timeout "$CONNECT_TIMEOUT" \
|
|
--max-time "$MAX_TIME" \
|
|
-H "Content-Type: application/json" \
|
|
"$SERVER_URL/v1/chat/completions" \
|
|
-d @- <<JSON
|
|
{
|
|
"model": "${MODEL_ID}",
|
|
"messages": [
|
|
{"role": "user", "content": "${PROMPT}"}
|
|
],
|
|
"max_tokens": ${MAX_TOKENS},
|
|
"stream": true
|
|
}
|
|
JSON
|
|
|
|
echo |