move cli into crates and stage for release

2025-09-08 22:46:44 +00:00 · 2025-08-31 13:23:50 -04:00
parent 9e9aa69769
commit 0580dc8c5e
26 changed files with 604 additions and 447 deletions
--- a/scripts/build_all_platforms.sh
+++ b/scripts/build_all_platforms.sh
@@ -1,389 +0,0 @@
-#!/bin/bash
-
-# Cross-platform build script for predict-otron-9000
-# Builds all workspace crates for common platforms
-
-set -euo pipefail
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Configuration
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-BUILD_DIR="${PROJECT_ROOT}/build"
-TIMESTAMP=$(date +%Y%m%d_%H%M%S)
-
-# Supported platforms
-PLATFORMS=(
-    "x86_64-unknown-linux-gnu"
-    "x86_64-pc-windows-msvc"
-    "x86_64-apple-darwin"
-    "aarch64-apple-darwin"
-    "aarch64-unknown-linux-gnu"
-)
-
-# Main binaries to build
-MAIN_BINARIES=(
-    "predict-otron-9000"
-    "embeddings-engine"
-)
-
-# Inference engine binaries (with bin feature)
-INFERENCE_BINARIES=(
-    "gemma_inference"
-    "llama_inference"
-)
-
-# Other workspace binaries
-OTHER_BINARIES=(
-    "helm-chart-tool"
-)
-
-print_header() {
-    echo -e "${BLUE}================================${NC}"
-    echo -e "${BLUE}$1${NC}"
-    echo -e "${BLUE}================================${NC}"
-}
-
-print_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-print_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-print_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-check_dependencies() {
-    print_header "Checking Dependencies"
-    
-    # Check rust
-    if ! command -v cargo >/dev/null 2>&1; then
-        print_error "Rust/Cargo is not installed"
-        exit 1
-    fi
-    
-    # Check cargo-leptos for WASM frontend
-    if ! command -v cargo-leptos >/dev/null 2>&1; then
-        print_warn "cargo-leptos not found. Installing..."
-        cargo install cargo-leptos
-    fi
-    
-    print_info "All dependencies available"
-}
-
-install_targets() {
-    print_header "Installing Rust Targets"
-    
-    for platform in "${PLATFORMS[@]}"; do
-        print_info "Installing target: $platform"
-        rustup target add "$platform" || {
-            print_warn "Failed to install target $platform (may not be available on this host)"
-        }
-    done
-    
-    # Add WASM target for leptos
-    print_info "Installing wasm32-unknown-unknown target for Leptos"
-    rustup target add wasm32-unknown-unknown
-}
-
-create_build_dirs() {
-    print_header "Setting up Build Directory"
-    
-    rm -rf "$BUILD_DIR"
-    mkdir -p "$BUILD_DIR"
-    
-    for platform in "${PLATFORMS[@]}"; do
-        mkdir -p "$BUILD_DIR/$platform"
-    done
-    
-    mkdir -p "$BUILD_DIR/web"
-    print_info "Build directories created"
-}
-
-build_leptos_app() {
-    print_header "Building Leptos Web Frontend"
-    
-    cd "$PROJECT_ROOT/crates/leptos-app"
-    
-    # Build the WASM frontend
-    print_info "Building WASM frontend with cargo-leptos..."
-    cargo leptos build --release || {
-        print_error "Failed to build Leptos WASM frontend"
-        return 1
-    }
-    
-    # Copy built assets to build directory
-    if [ -d "target/site" ]; then
-        cp -r target/site/* "$BUILD_DIR/web/"
-        print_info "Leptos frontend built and copied to $BUILD_DIR/web/"
-    else
-        print_error "Leptos build output not found at target/site"
-        return 1
-    fi
-    
-    cd "$PROJECT_ROOT"
-}
-
-get_platform_features() {
-    local platform="$1"
-    local features=""
-    
-    case "$platform" in
-        *-apple-darwin)
-            # macOS uses Metal but routes to CPU for Gemma stability
-            features=""
-            ;;
-        *-unknown-linux-gnu|*-pc-windows-msvc)
-            # Linux and Windows can use CUDA if available
-            features=""
-            ;;
-        *)
-            features=""
-            ;;
-    esac
-    
-    echo "$features"
-}
-
-build_binary_for_platform() {
-    local binary_name="$1"
-    local platform="$2"
-    local package_name="$3"
-    local additional_args="$4"
-    
-    print_info "Building $binary_name for $platform"
-    
-    local features=$(get_platform_features "$platform")
-    local feature_flag=""
-    if [ -n "$features" ]; then
-        feature_flag="--features $features"
-    fi
-    
-    # Build command
-    local build_cmd="cargo build --release --target $platform --bin $binary_name"
-    
-    if [ -n "$package_name" ]; then
-        build_cmd="$build_cmd --package $package_name"
-    fi
-    
-    if [ -n "$additional_args" ]; then
-        build_cmd="$build_cmd $additional_args"
-    fi
-    
-    if [ -n "$feature_flag" ]; then
-        build_cmd="$build_cmd $feature_flag"
-    fi
-    
-    print_info "Running: $build_cmd"
-    
-    if eval "$build_cmd"; then
-        # Copy binary to build directory
-        local target_dir="target/$platform/release"
-        local binary_file="$binary_name"
-        
-        # Add .exe extension for Windows
-        if [[ "$platform" == *-pc-windows-msvc ]]; then
-            binary_file="$binary_name.exe"
-        fi
-        
-        if [ -f "$target_dir/$binary_file" ]; then
-            cp "$target_dir/$binary_file" "$BUILD_DIR/$platform/"
-            print_info "✓ $binary_name built and copied for $platform"
-        else
-            print_error "Binary not found: $target_dir/$binary_file"
-            return 1
-        fi
-    else
-        print_error "Failed to build $binary_name for $platform"
-        return 1
-    fi
-}
-
-build_for_platform() {
-    local platform="$1"
-    print_header "Building for $platform"
-    
-    local failed_builds=()
-    
-    # Build main binaries
-    for binary in "${MAIN_BINARIES[@]}"; do
-        if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
-            failed_builds+=("$binary")
-        fi
-    done
-    
-    # Build inference engine binaries with bin feature
-    for binary in "${INFERENCE_BINARIES[@]}"; do
-        if ! build_binary_for_platform "$binary" "$platform" "inference-engine" "--features bin"; then
-            failed_builds+=("$binary")
-        fi
-    done
-    
-    # Build other workspace binaries
-    for binary in "${OTHER_BINARIES[@]}"; do
-        if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
-            failed_builds+=("$binary")
-        fi
-    done
-    
-    if [ ${#failed_builds[@]} -eq 0 ]; then
-        print_info "✓ All binaries built successfully for $platform"
-    else
-        print_warn "Some builds failed for $platform: ${failed_builds[*]}"
-    fi
-}
-
-create_archives() {
-    print_header "Creating Release Archives"
-    
-    cd "$BUILD_DIR"
-    
-    for platform in "${PLATFORMS[@]}"; do
-        if [ -d "$platform" ] && [ -n "$(ls -A "$platform" 2>/dev/null)" ]; then
-            local archive_name="predict-otron-9000-${platform}-${TIMESTAMP}"
-            
-            print_info "Creating archive for $platform"
-            
-            # Create platform-specific directory with all files
-            mkdir -p "$archive_name"
-            cp -r "$platform"/* "$archive_name/"
-            
-            # Add web assets to each platform archive
-            if [ -d "web" ]; then
-                mkdir -p "$archive_name/web"
-                cp -r web/* "$archive_name/web/"
-            fi
-            
-            # Create README for the platform
-            cat > "$archive_name/README.txt" << EOF
-Predict-Otron-9000 - Platform: $platform
-Build Date: $(date)
-========================================
-
-Binaries included:
-$(ls -1 "$platform")
-
-Web Frontend:
- Located in the 'web' directory
- Serve with any static file server on port 8788 or configure your server
-
-Usage:
-1. Start the main server: ./predict-otron-9000
-2. Start embeddings service: ./embeddings-engine  
-3. Access web interface at http://localhost:8080 (served by main server)
-
-For more information, visit: https://github.com/geoffsee/predict-otron-9000
-EOF
-            
-            # Create tar.gz archive
-            tar -czf "${archive_name}.tar.gz" "$archive_name"
-            rm -rf "$archive_name"
-            
-            print_info "✓ Created ${archive_name}.tar.gz"
-        else
-            print_warn "No binaries found for $platform, skipping archive"
-        fi
-    done
-    
-    cd "$PROJECT_ROOT"
-}
-
-generate_build_report() {
-    print_header "Build Report"
-    
-    echo "Build completed at: $(date)"
-    echo "Build directory: $BUILD_DIR"
-    echo ""
-    echo "Archives created:"
-    ls -la "$BUILD_DIR"/*.tar.gz 2>/dev/null || echo "No archives created"
-    echo ""
-    echo "Platform directories:"
-    for platform in "${PLATFORMS[@]}"; do
-        if [ -d "$BUILD_DIR/$platform" ]; then
-            echo "  $platform:"
-            ls -la "$BUILD_DIR/$platform" | sed 's/^/    /'
-        fi
-    done
-    
-    if [ -d "$BUILD_DIR/web" ]; then
-        echo ""
-        echo "Web frontend assets:"
-        ls -la "$BUILD_DIR/web" | head -10 | sed 's/^/    /'
-        if [ $(ls -1 "$BUILD_DIR/web" | wc -l) -gt 10 ]; then
-            echo "    ... and $(( $(ls -1 "$BUILD_DIR/web" | wc -l) - 10 )) more files"
-        fi
-    fi
-}
-
-main() {
-    print_header "Predict-Otron-9000 Cross-Platform Build Script"
-    
-    cd "$PROJECT_ROOT"
-    
-    check_dependencies
-    install_targets
-    create_build_dirs
-    
-    # Build Leptos web frontend first
-    build_leptos_app
-    
-    # Build for each platform
-    for platform in "${PLATFORMS[@]}"; do
-        build_for_platform "$platform"
-    done
-    
-    create_archives
-    generate_build_report
-    
-    print_header "Build Complete!"
-    print_info "All artifacts are available in: $BUILD_DIR"
-}
-
-# Handle command line arguments
-case "${1:-}" in
-    --help|-h)
-        echo "Usage: $0 [options]"
-        echo ""
-        echo "Cross-platform build script for predict-otron-9000"
-        echo ""
-        echo "Options:"
-        echo "  --help, -h          Show this help message"
-        echo "  --platforms         Show supported platforms"
-        echo "  --clean             Clean build directory before building"
-        echo ""
-        echo "Supported platforms:"
-        for platform in "${PLATFORMS[@]}"; do
-            echo "  - $platform"
-        done
-        echo ""
-        echo "Prerequisites:"
-        echo "  - Rust toolchain with rustup"
-        echo "  - cargo-leptos (will be installed if missing)"
-        echo "  - Platform-specific toolchains for cross-compilation"
-        echo ""
-        exit 0
-        ;;
-    --platforms)
-        echo "Supported platforms:"
-        for platform in "${PLATFORMS[@]}"; do
-            echo "  - $platform"
-        done
-        exit 0
-        ;;
-    --clean)
-        print_info "Cleaning build directory..."
-        rm -rf "$BUILD_DIR"
-        print_info "Build directory cleaned"
-        ;;
-esac
-
-main "$@"
--- a/scripts/build_cli.sh
+++ b/scripts/build_cli.sh
@@ -1,19 +0,0 @@
-#!/usr/bin/env sh
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-
-TEMP_DIR="$SCRIPT_DIR/temp"
-
-mkdir -p "$TEMP_DIR"
-
-cp "$SCRIPT_DIR/cli.ts" "$TEMP_DIR/cli.ts"
-cp "$SCRIPT_DIR/../package.json" "$TEMP_DIR/package.json"
-
-(
-cd "$TEMP_DIR"
-bun i
-bun build ./cli.ts --compile --outfile "$SCRIPT_DIR/cli"
-)
-
-rm -rf "$TEMP_DIR"
--- a/scripts/cli.ts
+++ b/scripts/cli.ts
@@ -1,340 +0,0 @@
-#!/usr/bin/env bun
-
-import OpenAI from "openai";
-import { parseArgs } from "util";
-
-// =====================
-// Config
-// =====================
-const DEFAULT_MODEL = "gemma-3-1b-it";
-const DEFAULT_MAX_TOKENS = 256;
-
-// Toggle this to reduce log overhead during timing runs
-const PRINT_CHUNK_DEBUG = false;
-
-// How many rows to show in the timing tables
-const SHOW_FIRST_N = 3;
-const SHOW_SLOWEST_N = 3;
-
-// =====================
-// Helpers
-// =====================
-const now = () => performance.now();
-
-type ChunkStat = {
-    index: number;
-    tSinceRequestStartMs: number;
-    dtSincePrevMs: number;
-    contentChars: number;
-};
-
-function printHelp() {
-    console.log(`
-Usage: bun client_cli.ts [options] [prompt]
-
-Simple CLI tool for testing the local OpenAI-compatible API server.
-
-Options:
-  --model <model>     Model to use (default: ${DEFAULT_MODEL})
-  --prompt <prompt>   The prompt to send (can also be provided as positional argument)
-  --list-models       List all available models from the server
-  --help              Show this help message
-
-Examples:
-  ./cli.ts "What is the capital of France?"
-  ./cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
-  ./cli.ts --prompt "Who was the 16th president of the United States?"
-  ./cli.ts --list-models
-
-The server should be running at http://localhost:8080
-Start it with: ./run_server.sh
-`);
-}
-
-const { values, positionals } = parseArgs({
-    args: process.argv.slice(2),
-    options: {
-        model: { type: "string" },
-        prompt: { type: "string" },
-        help: { type: "boolean" },
-        "list-models": { type: "boolean" },
-    },
-    strict: false,
-    allowPositionals: true,
-});
-
-async function requestLocalOpenAI(model: string, userPrompt: string) {
-    const openai = new OpenAI({
-        baseURL: "http://localhost:8080/v1",
-        apiKey: "not used",
-    });
-    try {
-        console.log("[DEBUG] Creating chat completion request...");
-        return openai.chat.completions.create({
-            model,
-            max_tokens: DEFAULT_MAX_TOKENS,
-            stream: true,
-            messages: [
-                {
-                    role: "system",
-                    content: "You are a helpful assistant who responds thoughtfully and concisely.",
-                },
-                { role: "user", content: userPrompt },
-            ],
-        });
-    } catch (e: any) {
-        console.error("[ERROR] Failed to connect to local OpenAI server:", e.message);
-        console.error("[HINT] Make sure the server is running at http://localhost:8080");
-        console.error("[HINT] Start it with: ./run_server.sh");
-        throw e;
-    }
-}
-
-async function listModels() {
-    const openai = new OpenAI({
-        baseURL: "http://localhost:8080/v1",
-        apiKey: "not used",
-    });
-    try {
-        const models = await openai.models.list();
-        console.log(`[INFO] Available models from http://localhost:8080/v1:`);
-        console.log("---");
-
-        if (models.data && models.data.length > 0) {
-            models.data.forEach((model, index) => {
-                console.log(`${index + 1}. ${model.id}`);
-                console.log(`   Owner: ${model.owned_by}`);
-                console.log(`   Created: ${new Date(model.created * 1000).toISOString()}`);
-                console.log("");
-            });
-            console.log(`Total: ${models.data.length} models available`);
-        } else {
-            console.log("No models found.");
-        }
-    } catch (e: any) {
-        console.error("[ERROR] Failed to fetch models from local OpenAI server:", e.message);
-        console.error("[HINT] Make sure the server is running at http://localhost:8080");
-        console.error("[HINT] Start it with: ./run_server.sh");
-        throw e;
-    }
-}
-
-// =====================
-// Timing math
-// =====================
-function median(nums: number[]) {
-    if (nums.length === 0) return 0;
-    const s = [...nums].sort((a, b) => a - b);
-    const mid = Math.floor(s.length / 2);
-    return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
-}
-
-function quantile(nums: number[], q: number) {
-    if (nums.length === 0) return 0;
-    const s = [...nums].sort((a, b) => a - b);
-    const pos = (s.length - 1) * q;
-    const base = Math.floor(pos);
-    const rest = pos - base;
-    return s[base + 1] !== undefined ? s[base] + rest * (s[base + 1] - s[base]) : s[base];
-}
-
-function ms(n: number) {
-    return `${n.toFixed(1)} ms`;
-}
-
-// =====================
-// Main
-// =====================
-async function main() {
-    const tProgramStart = now();
-
-    if (values.help) {
-        printHelp();
-        process.exit(0);
-    }
-
-    if (values["list-models"]) {
-        try {
-            await listModels();
-            process.exit(0);
-        } catch (error: any) {
-            console.error("\n[ERROR] Failed to list models:", error.message);
-            process.exit(1);
-        }
-    }
-
-    const prompt = values.prompt ?? positionals[0];
-
-    if (!prompt) {
-        console.error("[ERROR] No prompt provided!");
-        printHelp();
-        process.exit(1);
-    }
-
-    const model = values.model || DEFAULT_MODEL;
-
-    console.log(`[INFO] Using model: ${model}`);
-    console.log(`[INFO] Prompt: ${prompt}`);
-    console.log(`[INFO] Connecting to: http://localhost:8080/v1`);
-    console.log("---");
-
-    const tBeforeRequest = now();
-
-    try {
-        console.log("[DEBUG] Initiating request to OpenAI server...");
-        const response = await requestLocalOpenAI(model, prompt);
-        const tAfterCreate = now();
-
-        // Streaming handling + timing
-        let fullResponse = "";
-        let chunkCount = 0;
-
-        const chunkStats: ChunkStat[] = [];
-        let tFirstChunk: number | null = null;
-        let tPrevChunk: number | null = null;
-
-        console.log("[INFO] Waiting for model to generate response...");
-        let loadingInterval;
-        if (!PRINT_CHUNK_DEBUG) {
-            // Show loading animation only if not in debug mode
-            const loadingChars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
-            let i = 0;
-            process.stdout.write('\r[INFO] Thinking  ');
-            loadingInterval = setInterval(() => {
-                process.stdout.write(`\r[INFO] Thinking ${loadingChars[i++ % loadingChars.length]} `);
-            }, 80);
-        } else {
-            console.log("[DEBUG] Starting to receive streaming response...");
-        }
-
-        for await (const chunk of response) {
-            // Clear loading animation on first chunk
-            if (loadingInterval) {
-                clearInterval(loadingInterval);
-                process.stdout.write('\r                      \r');
-            }
-            const tNow = now();
-            chunkCount++;
-
-            // Extract content (delta) if present
-            const content = chunk.choices?.[0]?.delta?.content ?? "";
-            if (PRINT_CHUNK_DEBUG) {
-                console.log(`[DEBUG] Received chunk #${chunkCount}:`, JSON.stringify(chunk));
-                if (content) console.log(`[DEBUG] Chunk content: "${content}"`);
-            }
-
-            if (content) {
-                process.stdout.write(content);
-                fullResponse += content;
-            }
-
-            if (tFirstChunk === null) tFirstChunk = tNow;
-
-            const dtSincePrev = tPrevChunk === null ? 0 : tNow - tPrevChunk;
-            chunkStats.push({
-                index: chunkCount,
-                tSinceRequestStartMs: tNow - tBeforeRequest,
-                dtSincePrevMs: dtSincePrev,
-                contentChars: content.length,
-            });
-
-            tPrevChunk = tNow;
-        }
-
-        // =========
-        // Summary
-        // =========
-        const tStreamEnd = now();
-        const totalChars = fullResponse.length;
-
-        console.log("\n---");
-        console.log(`[DEBUG] Stream completed after ${chunkCount} chunks`);
-        console.log(`[INFO] Response completed. Total length: ${totalChars} characters`);
-
-        // Build timing metrics
-        const ttfbMs = (tFirstChunk ?? tStreamEnd) - tAfterCreate; // time from create() resolved → first chunk
-        const createOverheadMs = tAfterCreate - tBeforeRequest;    // time spent awaiting create() promise
-        const totalSinceRequestMs = tStreamEnd - tBeforeRequest;   // from just before create() to last chunk
-        const streamDurationMs =
-            tFirstChunk === null ? 0 : tStreamEnd - tFirstChunk;
-
-        const gaps = chunkStats
-            .map((c) => c.dtSincePrevMs)
-            // ignore the first "gap" which is 0 by construction
-            .slice(1);
-
-        const avgGapMs = gaps.length ? gaps.reduce((a, b) => a + b, 0) / gaps.length : 0;
-        const medGapMs = median(gaps);
-        const p95GapMs = quantile(gaps, 0.95);
-
-        let maxGapMs = 0;
-        let maxGapAtChunk = 0;
-        for (let i = 0; i < gaps.length; i++) {
-            if (gaps[i] > maxGapMs) {
-                maxGapMs = gaps[i];
-                maxGapAtChunk = i + 2; // +1 to move from 0-based, +1 because we sliced starting at second chunk
-            }
-        }
-
-        // Pretty print summary
-        console.log("\n=== Timing Summary ===");
-        console.log(`create() await time:        ${ms(createOverheadMs)}`);
-        console.log(`TTFB (to 1st chunk):        ${ms(ttfbMs)}`);
-        console.log(`Stream duration:            ${ms(streamDurationMs)}`);
-        console.log(`End-to-end (req→last):      ${ms(totalSinceRequestMs)}`);
-        console.log(`Chunks:                     ${chunkCount}`);
-        console.log(`Total content chars:        ${totalChars}`);
-        console.log(`Avg chars/chunk:            ${(chunkCount ? totalChars / chunkCount : 0).toFixed(1)}`);
-        console.log(`Inter-chunk gap (avg):      ${ms(avgGapMs)}`);
-        console.log(`Inter-chunk gap (median):   ${ms(medGapMs)}`);
-        console.log(`Inter-chunk gap (p95):      ${ms(p95GapMs)}`);
-        if (gaps.length > 0) {
-            console.log(`Largest gap:                ${ms(maxGapMs)} (before chunk #${maxGapAtChunk})`);
-        }
-
-        // Small tables: first N and slowest N gaps
-        const firstRows = chunkStats.slice(0, SHOW_FIRST_N).map((c) => ({
-            chunk: c.index,
-            "t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
-            "dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
-            "chars": c.contentChars,
-        }));
-
-        const slowestRows = chunkStats
-            .slice(1) // skip first (no meaningful gap)
-            .sort((a, b) => b.dtSincePrevMs - a.dtSincePrevMs)
-            .slice(0, SHOW_SLOWEST_N)
-            .map((c) => ({
-                chunk: c.index,
-                "t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
-                "dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
-                "chars": c.contentChars,
-            }));
-
-        if (firstRows.length > 0) {
-            console.log("\n--- First chunk timings ---");
-            // @ts-ignore Bun/Node support console.table
-            console.table(firstRows);
-        }
-
-        if (slowestRows.length > 0) {
-            console.log(`\n--- Slowest ${SHOW_SLOWEST_N} gaps ---`);
-            // @ts-ignore
-            console.table(slowestRows);
-        }
-
-        const tProgramEnd = now();
-        console.log("\n=== Program Overhead ===");
-        console.log(`Total program runtime:      ${ms(tProgramEnd - tProgramStart)}`);
-
-    } catch (error: any) {
-        console.error("\n[ERROR] Request failed:", error.message);
-        process.exit(1);
-    }
-}
-
-// Run the main function
-main().catch((error) => {
-    console.error("[FATAL ERROR]:", error);
-    process.exit(1);
-});