mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
move cli into crates and stage for release
This commit is contained in:
@@ -1,389 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Cross-platform build script for predict-otron-9000
|
||||
# Builds all workspace crates for common platforms
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
BUILD_DIR="${PROJECT_ROOT}/build"
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
|
||||
# Supported platforms
|
||||
PLATFORMS=(
|
||||
"x86_64-unknown-linux-gnu"
|
||||
"x86_64-pc-windows-msvc"
|
||||
"x86_64-apple-darwin"
|
||||
"aarch64-apple-darwin"
|
||||
"aarch64-unknown-linux-gnu"
|
||||
)
|
||||
|
||||
# Main binaries to build
|
||||
MAIN_BINARIES=(
|
||||
"predict-otron-9000"
|
||||
"embeddings-engine"
|
||||
)
|
||||
|
||||
# Inference engine binaries (with bin feature)
|
||||
INFERENCE_BINARIES=(
|
||||
"gemma_inference"
|
||||
"llama_inference"
|
||||
)
|
||||
|
||||
# Other workspace binaries
|
||||
OTHER_BINARIES=(
|
||||
"helm-chart-tool"
|
||||
)
|
||||
|
||||
print_header() {
|
||||
echo -e "${BLUE}================================${NC}"
|
||||
echo -e "${BLUE}$1${NC}"
|
||||
echo -e "${BLUE}================================${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${GREEN}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
check_dependencies() {
|
||||
print_header "Checking Dependencies"
|
||||
|
||||
# Check rust
|
||||
if ! command -v cargo >/dev/null 2>&1; then
|
||||
print_error "Rust/Cargo is not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check cargo-leptos for WASM frontend
|
||||
if ! command -v cargo-leptos >/dev/null 2>&1; then
|
||||
print_warn "cargo-leptos not found. Installing..."
|
||||
cargo install cargo-leptos
|
||||
fi
|
||||
|
||||
print_info "All dependencies available"
|
||||
}
|
||||
|
||||
install_targets() {
|
||||
print_header "Installing Rust Targets"
|
||||
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
print_info "Installing target: $platform"
|
||||
rustup target add "$platform" || {
|
||||
print_warn "Failed to install target $platform (may not be available on this host)"
|
||||
}
|
||||
done
|
||||
|
||||
# Add WASM target for leptos
|
||||
print_info "Installing wasm32-unknown-unknown target for Leptos"
|
||||
rustup target add wasm32-unknown-unknown
|
||||
}
|
||||
|
||||
create_build_dirs() {
|
||||
print_header "Setting up Build Directory"
|
||||
|
||||
rm -rf "$BUILD_DIR"
|
||||
mkdir -p "$BUILD_DIR"
|
||||
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
mkdir -p "$BUILD_DIR/$platform"
|
||||
done
|
||||
|
||||
mkdir -p "$BUILD_DIR/web"
|
||||
print_info "Build directories created"
|
||||
}
|
||||
|
||||
build_leptos_app() {
|
||||
print_header "Building Leptos Web Frontend"
|
||||
|
||||
cd "$PROJECT_ROOT/crates/leptos-app"
|
||||
|
||||
# Build the WASM frontend
|
||||
print_info "Building WASM frontend with cargo-leptos..."
|
||||
cargo leptos build --release || {
|
||||
print_error "Failed to build Leptos WASM frontend"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Copy built assets to build directory
|
||||
if [ -d "target/site" ]; then
|
||||
cp -r target/site/* "$BUILD_DIR/web/"
|
||||
print_info "Leptos frontend built and copied to $BUILD_DIR/web/"
|
||||
else
|
||||
print_error "Leptos build output not found at target/site"
|
||||
return 1
|
||||
fi
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
}
|
||||
|
||||
get_platform_features() {
|
||||
local platform="$1"
|
||||
local features=""
|
||||
|
||||
case "$platform" in
|
||||
*-apple-darwin)
|
||||
# macOS uses Metal but routes to CPU for Gemma stability
|
||||
features=""
|
||||
;;
|
||||
*-unknown-linux-gnu|*-pc-windows-msvc)
|
||||
# Linux and Windows can use CUDA if available
|
||||
features=""
|
||||
;;
|
||||
*)
|
||||
features=""
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "$features"
|
||||
}
|
||||
|
||||
build_binary_for_platform() {
|
||||
local binary_name="$1"
|
||||
local platform="$2"
|
||||
local package_name="$3"
|
||||
local additional_args="$4"
|
||||
|
||||
print_info "Building $binary_name for $platform"
|
||||
|
||||
local features=$(get_platform_features "$platform")
|
||||
local feature_flag=""
|
||||
if [ -n "$features" ]; then
|
||||
feature_flag="--features $features"
|
||||
fi
|
||||
|
||||
# Build command
|
||||
local build_cmd="cargo build --release --target $platform --bin $binary_name"
|
||||
|
||||
if [ -n "$package_name" ]; then
|
||||
build_cmd="$build_cmd --package $package_name"
|
||||
fi
|
||||
|
||||
if [ -n "$additional_args" ]; then
|
||||
build_cmd="$build_cmd $additional_args"
|
||||
fi
|
||||
|
||||
if [ -n "$feature_flag" ]; then
|
||||
build_cmd="$build_cmd $feature_flag"
|
||||
fi
|
||||
|
||||
print_info "Running: $build_cmd"
|
||||
|
||||
if eval "$build_cmd"; then
|
||||
# Copy binary to build directory
|
||||
local target_dir="target/$platform/release"
|
||||
local binary_file="$binary_name"
|
||||
|
||||
# Add .exe extension for Windows
|
||||
if [[ "$platform" == *-pc-windows-msvc ]]; then
|
||||
binary_file="$binary_name.exe"
|
||||
fi
|
||||
|
||||
if [ -f "$target_dir/$binary_file" ]; then
|
||||
cp "$target_dir/$binary_file" "$BUILD_DIR/$platform/"
|
||||
print_info "✓ $binary_name built and copied for $platform"
|
||||
else
|
||||
print_error "Binary not found: $target_dir/$binary_file"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
print_error "Failed to build $binary_name for $platform"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
build_for_platform() {
|
||||
local platform="$1"
|
||||
print_header "Building for $platform"
|
||||
|
||||
local failed_builds=()
|
||||
|
||||
# Build main binaries
|
||||
for binary in "${MAIN_BINARIES[@]}"; do
|
||||
if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
|
||||
failed_builds+=("$binary")
|
||||
fi
|
||||
done
|
||||
|
||||
# Build inference engine binaries with bin feature
|
||||
for binary in "${INFERENCE_BINARIES[@]}"; do
|
||||
if ! build_binary_for_platform "$binary" "$platform" "inference-engine" "--features bin"; then
|
||||
failed_builds+=("$binary")
|
||||
fi
|
||||
done
|
||||
|
||||
# Build other workspace binaries
|
||||
for binary in "${OTHER_BINARIES[@]}"; do
|
||||
if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
|
||||
failed_builds+=("$binary")
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${#failed_builds[@]} -eq 0 ]; then
|
||||
print_info "✓ All binaries built successfully for $platform"
|
||||
else
|
||||
print_warn "Some builds failed for $platform: ${failed_builds[*]}"
|
||||
fi
|
||||
}
|
||||
|
||||
create_archives() {
|
||||
print_header "Creating Release Archives"
|
||||
|
||||
cd "$BUILD_DIR"
|
||||
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
if [ -d "$platform" ] && [ -n "$(ls -A "$platform" 2>/dev/null)" ]; then
|
||||
local archive_name="predict-otron-9000-${platform}-${TIMESTAMP}"
|
||||
|
||||
print_info "Creating archive for $platform"
|
||||
|
||||
# Create platform-specific directory with all files
|
||||
mkdir -p "$archive_name"
|
||||
cp -r "$platform"/* "$archive_name/"
|
||||
|
||||
# Add web assets to each platform archive
|
||||
if [ -d "web" ]; then
|
||||
mkdir -p "$archive_name/web"
|
||||
cp -r web/* "$archive_name/web/"
|
||||
fi
|
||||
|
||||
# Create README for the platform
|
||||
cat > "$archive_name/README.txt" << EOF
|
||||
Predict-Otron-9000 - Platform: $platform
|
||||
Build Date: $(date)
|
||||
========================================
|
||||
|
||||
Binaries included:
|
||||
$(ls -1 "$platform")
|
||||
|
||||
Web Frontend:
|
||||
- Located in the 'web' directory
|
||||
- Serve with any static file server on port 8788 or configure your server
|
||||
|
||||
Usage:
|
||||
1. Start the main server: ./predict-otron-9000
|
||||
2. Start embeddings service: ./embeddings-engine
|
||||
3. Access web interface at http://localhost:8080 (served by main server)
|
||||
|
||||
For more information, visit: https://github.com/geoffsee/predict-otron-9000
|
||||
EOF
|
||||
|
||||
# Create tar.gz archive
|
||||
tar -czf "${archive_name}.tar.gz" "$archive_name"
|
||||
rm -rf "$archive_name"
|
||||
|
||||
print_info "✓ Created ${archive_name}.tar.gz"
|
||||
else
|
||||
print_warn "No binaries found for $platform, skipping archive"
|
||||
fi
|
||||
done
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
}
|
||||
|
||||
generate_build_report() {
|
||||
print_header "Build Report"
|
||||
|
||||
echo "Build completed at: $(date)"
|
||||
echo "Build directory: $BUILD_DIR"
|
||||
echo ""
|
||||
echo "Archives created:"
|
||||
ls -la "$BUILD_DIR"/*.tar.gz 2>/dev/null || echo "No archives created"
|
||||
echo ""
|
||||
echo "Platform directories:"
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
if [ -d "$BUILD_DIR/$platform" ]; then
|
||||
echo " $platform:"
|
||||
ls -la "$BUILD_DIR/$platform" | sed 's/^/ /'
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -d "$BUILD_DIR/web" ]; then
|
||||
echo ""
|
||||
echo "Web frontend assets:"
|
||||
ls -la "$BUILD_DIR/web" | head -10 | sed 's/^/ /'
|
||||
if [ $(ls -1 "$BUILD_DIR/web" | wc -l) -gt 10 ]; then
|
||||
echo " ... and $(( $(ls -1 "$BUILD_DIR/web" | wc -l) - 10 )) more files"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
print_header "Predict-Otron-9000 Cross-Platform Build Script"
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
check_dependencies
|
||||
install_targets
|
||||
create_build_dirs
|
||||
|
||||
# Build Leptos web frontend first
|
||||
build_leptos_app
|
||||
|
||||
# Build for each platform
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
build_for_platform "$platform"
|
||||
done
|
||||
|
||||
create_archives
|
||||
generate_build_report
|
||||
|
||||
print_header "Build Complete!"
|
||||
print_info "All artifacts are available in: $BUILD_DIR"
|
||||
}
|
||||
|
||||
# Handle command line arguments
|
||||
case "${1:-}" in
|
||||
--help|-h)
|
||||
echo "Usage: $0 [options]"
|
||||
echo ""
|
||||
echo "Cross-platform build script for predict-otron-9000"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --help, -h Show this help message"
|
||||
echo " --platforms Show supported platforms"
|
||||
echo " --clean Clean build directory before building"
|
||||
echo ""
|
||||
echo "Supported platforms:"
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
echo " - $platform"
|
||||
done
|
||||
echo ""
|
||||
echo "Prerequisites:"
|
||||
echo " - Rust toolchain with rustup"
|
||||
echo " - cargo-leptos (will be installed if missing)"
|
||||
echo " - Platform-specific toolchains for cross-compilation"
|
||||
echo ""
|
||||
exit 0
|
||||
;;
|
||||
--platforms)
|
||||
echo "Supported platforms:"
|
||||
for platform in "${PLATFORMS[@]}"; do
|
||||
echo " - $platform"
|
||||
done
|
||||
exit 0
|
||||
;;
|
||||
--clean)
|
||||
print_info "Cleaning build directory..."
|
||||
rm -rf "$BUILD_DIR"
|
||||
print_info "Build directory cleaned"
|
||||
;;
|
||||
esac
|
||||
|
||||
main "$@"
|
@@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
TEMP_DIR="$SCRIPT_DIR/temp"
|
||||
|
||||
mkdir -p "$TEMP_DIR"
|
||||
|
||||
cp "$SCRIPT_DIR/cli.ts" "$TEMP_DIR/cli.ts"
|
||||
cp "$SCRIPT_DIR/../package.json" "$TEMP_DIR/package.json"
|
||||
|
||||
(
|
||||
cd "$TEMP_DIR"
|
||||
bun i
|
||||
bun build ./cli.ts --compile --outfile "$SCRIPT_DIR/cli"
|
||||
)
|
||||
|
||||
rm -rf "$TEMP_DIR"
|
340
scripts/cli.ts
340
scripts/cli.ts
@@ -1,340 +0,0 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import OpenAI from "openai";
|
||||
import { parseArgs } from "util";
|
||||
|
||||
// =====================
|
||||
// Config
|
||||
// =====================
|
||||
const DEFAULT_MODEL = "gemma-3-1b-it";
|
||||
const DEFAULT_MAX_TOKENS = 256;
|
||||
|
||||
// Toggle this to reduce log overhead during timing runs
|
||||
const PRINT_CHUNK_DEBUG = false;
|
||||
|
||||
// How many rows to show in the timing tables
|
||||
const SHOW_FIRST_N = 3;
|
||||
const SHOW_SLOWEST_N = 3;
|
||||
|
||||
// =====================
|
||||
// Helpers
|
||||
// =====================
|
||||
const now = () => performance.now();
|
||||
|
||||
type ChunkStat = {
|
||||
index: number;
|
||||
tSinceRequestStartMs: number;
|
||||
dtSincePrevMs: number;
|
||||
contentChars: number;
|
||||
};
|
||||
|
||||
function printHelp() {
|
||||
console.log(`
|
||||
Usage: bun client_cli.ts [options] [prompt]
|
||||
|
||||
Simple CLI tool for testing the local OpenAI-compatible API server.
|
||||
|
||||
Options:
|
||||
--model <model> Model to use (default: ${DEFAULT_MODEL})
|
||||
--prompt <prompt> The prompt to send (can also be provided as positional argument)
|
||||
--list-models List all available models from the server
|
||||
--help Show this help message
|
||||
|
||||
Examples:
|
||||
./cli.ts "What is the capital of France?"
|
||||
./cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
|
||||
./cli.ts --prompt "Who was the 16th president of the United States?"
|
||||
./cli.ts --list-models
|
||||
|
||||
The server should be running at http://localhost:8080
|
||||
Start it with: ./run_server.sh
|
||||
`);
|
||||
}
|
||||
|
||||
const { values, positionals } = parseArgs({
|
||||
args: process.argv.slice(2),
|
||||
options: {
|
||||
model: { type: "string" },
|
||||
prompt: { type: "string" },
|
||||
help: { type: "boolean" },
|
||||
"list-models": { type: "boolean" },
|
||||
},
|
||||
strict: false,
|
||||
allowPositionals: true,
|
||||
});
|
||||
|
||||
async function requestLocalOpenAI(model: string, userPrompt: string) {
|
||||
const openai = new OpenAI({
|
||||
baseURL: "http://localhost:8080/v1",
|
||||
apiKey: "not used",
|
||||
});
|
||||
try {
|
||||
console.log("[DEBUG] Creating chat completion request...");
|
||||
return openai.chat.completions.create({
|
||||
model,
|
||||
max_tokens: DEFAULT_MAX_TOKENS,
|
||||
stream: true,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant who responds thoughtfully and concisely.",
|
||||
},
|
||||
{ role: "user", content: userPrompt },
|
||||
],
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error("[ERROR] Failed to connect to local OpenAI server:", e.message);
|
||||
console.error("[HINT] Make sure the server is running at http://localhost:8080");
|
||||
console.error("[HINT] Start it with: ./run_server.sh");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async function listModels() {
|
||||
const openai = new OpenAI({
|
||||
baseURL: "http://localhost:8080/v1",
|
||||
apiKey: "not used",
|
||||
});
|
||||
try {
|
||||
const models = await openai.models.list();
|
||||
console.log(`[INFO] Available models from http://localhost:8080/v1:`);
|
||||
console.log("---");
|
||||
|
||||
if (models.data && models.data.length > 0) {
|
||||
models.data.forEach((model, index) => {
|
||||
console.log(`${index + 1}. ${model.id}`);
|
||||
console.log(` Owner: ${model.owned_by}`);
|
||||
console.log(` Created: ${new Date(model.created * 1000).toISOString()}`);
|
||||
console.log("");
|
||||
});
|
||||
console.log(`Total: ${models.data.length} models available`);
|
||||
} else {
|
||||
console.log("No models found.");
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.error("[ERROR] Failed to fetch models from local OpenAI server:", e.message);
|
||||
console.error("[HINT] Make sure the server is running at http://localhost:8080");
|
||||
console.error("[HINT] Start it with: ./run_server.sh");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// =====================
|
||||
// Timing math
|
||||
// =====================
|
||||
function median(nums: number[]) {
|
||||
if (nums.length === 0) return 0;
|
||||
const s = [...nums].sort((a, b) => a - b);
|
||||
const mid = Math.floor(s.length / 2);
|
||||
return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
|
||||
}
|
||||
|
||||
function quantile(nums: number[], q: number) {
|
||||
if (nums.length === 0) return 0;
|
||||
const s = [...nums].sort((a, b) => a - b);
|
||||
const pos = (s.length - 1) * q;
|
||||
const base = Math.floor(pos);
|
||||
const rest = pos - base;
|
||||
return s[base + 1] !== undefined ? s[base] + rest * (s[base + 1] - s[base]) : s[base];
|
||||
}
|
||||
|
||||
function ms(n: number) {
|
||||
return `${n.toFixed(1)} ms`;
|
||||
}
|
||||
|
||||
// =====================
|
||||
// Main
|
||||
// =====================
|
||||
async function main() {
|
||||
const tProgramStart = now();
|
||||
|
||||
if (values.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (values["list-models"]) {
|
||||
try {
|
||||
await listModels();
|
||||
process.exit(0);
|
||||
} catch (error: any) {
|
||||
console.error("\n[ERROR] Failed to list models:", error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
const prompt = values.prompt ?? positionals[0];
|
||||
|
||||
if (!prompt) {
|
||||
console.error("[ERROR] No prompt provided!");
|
||||
printHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const model = values.model || DEFAULT_MODEL;
|
||||
|
||||
console.log(`[INFO] Using model: ${model}`);
|
||||
console.log(`[INFO] Prompt: ${prompt}`);
|
||||
console.log(`[INFO] Connecting to: http://localhost:8080/v1`);
|
||||
console.log("---");
|
||||
|
||||
const tBeforeRequest = now();
|
||||
|
||||
try {
|
||||
console.log("[DEBUG] Initiating request to OpenAI server...");
|
||||
const response = await requestLocalOpenAI(model, prompt);
|
||||
const tAfterCreate = now();
|
||||
|
||||
// Streaming handling + timing
|
||||
let fullResponse = "";
|
||||
let chunkCount = 0;
|
||||
|
||||
const chunkStats: ChunkStat[] = [];
|
||||
let tFirstChunk: number | null = null;
|
||||
let tPrevChunk: number | null = null;
|
||||
|
||||
console.log("[INFO] Waiting for model to generate response...");
|
||||
let loadingInterval;
|
||||
if (!PRINT_CHUNK_DEBUG) {
|
||||
// Show loading animation only if not in debug mode
|
||||
const loadingChars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
||||
let i = 0;
|
||||
process.stdout.write('\r[INFO] Thinking ');
|
||||
loadingInterval = setInterval(() => {
|
||||
process.stdout.write(`\r[INFO] Thinking ${loadingChars[i++ % loadingChars.length]} `);
|
||||
}, 80);
|
||||
} else {
|
||||
console.log("[DEBUG] Starting to receive streaming response...");
|
||||
}
|
||||
|
||||
for await (const chunk of response) {
|
||||
// Clear loading animation on first chunk
|
||||
if (loadingInterval) {
|
||||
clearInterval(loadingInterval);
|
||||
process.stdout.write('\r \r');
|
||||
}
|
||||
const tNow = now();
|
||||
chunkCount++;
|
||||
|
||||
// Extract content (delta) if present
|
||||
const content = chunk.choices?.[0]?.delta?.content ?? "";
|
||||
if (PRINT_CHUNK_DEBUG) {
|
||||
console.log(`[DEBUG] Received chunk #${chunkCount}:`, JSON.stringify(chunk));
|
||||
if (content) console.log(`[DEBUG] Chunk content: "${content}"`);
|
||||
}
|
||||
|
||||
if (content) {
|
||||
process.stdout.write(content);
|
||||
fullResponse += content;
|
||||
}
|
||||
|
||||
if (tFirstChunk === null) tFirstChunk = tNow;
|
||||
|
||||
const dtSincePrev = tPrevChunk === null ? 0 : tNow - tPrevChunk;
|
||||
chunkStats.push({
|
||||
index: chunkCount,
|
||||
tSinceRequestStartMs: tNow - tBeforeRequest,
|
||||
dtSincePrevMs: dtSincePrev,
|
||||
contentChars: content.length,
|
||||
});
|
||||
|
||||
tPrevChunk = tNow;
|
||||
}
|
||||
|
||||
// =========
|
||||
// Summary
|
||||
// =========
|
||||
const tStreamEnd = now();
|
||||
const totalChars = fullResponse.length;
|
||||
|
||||
console.log("\n---");
|
||||
console.log(`[DEBUG] Stream completed after ${chunkCount} chunks`);
|
||||
console.log(`[INFO] Response completed. Total length: ${totalChars} characters`);
|
||||
|
||||
// Build timing metrics
|
||||
const ttfbMs = (tFirstChunk ?? tStreamEnd) - tAfterCreate; // time from create() resolved → first chunk
|
||||
const createOverheadMs = tAfterCreate - tBeforeRequest; // time spent awaiting create() promise
|
||||
const totalSinceRequestMs = tStreamEnd - tBeforeRequest; // from just before create() to last chunk
|
||||
const streamDurationMs =
|
||||
tFirstChunk === null ? 0 : tStreamEnd - tFirstChunk;
|
||||
|
||||
const gaps = chunkStats
|
||||
.map((c) => c.dtSincePrevMs)
|
||||
// ignore the first "gap" which is 0 by construction
|
||||
.slice(1);
|
||||
|
||||
const avgGapMs = gaps.length ? gaps.reduce((a, b) => a + b, 0) / gaps.length : 0;
|
||||
const medGapMs = median(gaps);
|
||||
const p95GapMs = quantile(gaps, 0.95);
|
||||
|
||||
let maxGapMs = 0;
|
||||
let maxGapAtChunk = 0;
|
||||
for (let i = 0; i < gaps.length; i++) {
|
||||
if (gaps[i] > maxGapMs) {
|
||||
maxGapMs = gaps[i];
|
||||
maxGapAtChunk = i + 2; // +1 to move from 0-based, +1 because we sliced starting at second chunk
|
||||
}
|
||||
}
|
||||
|
||||
// Pretty print summary
|
||||
console.log("\n=== Timing Summary ===");
|
||||
console.log(`create() await time: ${ms(createOverheadMs)}`);
|
||||
console.log(`TTFB (to 1st chunk): ${ms(ttfbMs)}`);
|
||||
console.log(`Stream duration: ${ms(streamDurationMs)}`);
|
||||
console.log(`End-to-end (req→last): ${ms(totalSinceRequestMs)}`);
|
||||
console.log(`Chunks: ${chunkCount}`);
|
||||
console.log(`Total content chars: ${totalChars}`);
|
||||
console.log(`Avg chars/chunk: ${(chunkCount ? totalChars / chunkCount : 0).toFixed(1)}`);
|
||||
console.log(`Inter-chunk gap (avg): ${ms(avgGapMs)}`);
|
||||
console.log(`Inter-chunk gap (median): ${ms(medGapMs)}`);
|
||||
console.log(`Inter-chunk gap (p95): ${ms(p95GapMs)}`);
|
||||
if (gaps.length > 0) {
|
||||
console.log(`Largest gap: ${ms(maxGapMs)} (before chunk #${maxGapAtChunk})`);
|
||||
}
|
||||
|
||||
// Small tables: first N and slowest N gaps
|
||||
const firstRows = chunkStats.slice(0, SHOW_FIRST_N).map((c) => ({
|
||||
chunk: c.index,
|
||||
"t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
|
||||
"dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
|
||||
"chars": c.contentChars,
|
||||
}));
|
||||
|
||||
const slowestRows = chunkStats
|
||||
.slice(1) // skip first (no meaningful gap)
|
||||
.sort((a, b) => b.dtSincePrevMs - a.dtSincePrevMs)
|
||||
.slice(0, SHOW_SLOWEST_N)
|
||||
.map((c) => ({
|
||||
chunk: c.index,
|
||||
"t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
|
||||
"dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
|
||||
"chars": c.contentChars,
|
||||
}));
|
||||
|
||||
if (firstRows.length > 0) {
|
||||
console.log("\n--- First chunk timings ---");
|
||||
// @ts-ignore Bun/Node support console.table
|
||||
console.table(firstRows);
|
||||
}
|
||||
|
||||
if (slowestRows.length > 0) {
|
||||
console.log(`\n--- Slowest ${SHOW_SLOWEST_N} gaps ---`);
|
||||
// @ts-ignore
|
||||
console.table(slowestRows);
|
||||
}
|
||||
|
||||
const tProgramEnd = now();
|
||||
console.log("\n=== Program Overhead ===");
|
||||
console.log(`Total program runtime: ${ms(tProgramEnd - tProgramStart)}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error("\n[ERROR] Request failed:", error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the main function
|
||||
main().catch((error) => {
|
||||
console.error("[FATAL ERROR]:", error);
|
||||
process.exit(1);
|
||||
});
|
Reference in New Issue
Block a user