move cli into crates and stage for release

2025-09-08 22:46:44 +00:00 · 2025-08-31 13:23:50 -04:00
parent 9e9aa69769
commit 0580dc8c5e
26 changed files with 604 additions and 447 deletions
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "cli"
+version.workspace = true
+edition = "2021"
+build = "build.rs"
+
+[[bin]]
+name = "cli"
+path = "src/main.rs"
+
+[dependencies]
--- a/crates/cli/README.md
+++ b/crates/cli/README.md
@@ -0,0 +1,23 @@
+# cli
+
+A Rust/Typescript Hybrid
+
+```console
+./cli [options] [prompt]
+
+Simple CLI tool for testing the local OpenAI-compatible API server.
+
+Options:
+  --model <model>     Model to use (default: gemma-3-1b-it)
+  --prompt <prompt>   The prompt to send (can also be provided as positional argument)
+  --list-models       List all available models from the server
+  --help              Show this help message
+
+Examples:
+  ./cli "What is the capital of France?"
+  ./cli --model gemma-3-1b-it --prompt "Hello, world!"
+  ./cli --prompt "Who was the 16th president of the United States?"
+  ./cli --list-models
+
+The server must be running at http://localhost:8080
+```
--- a/crates/cli/build.rs
+++ b/crates/cli/build.rs
@@ -0,0 +1,209 @@
+use std::env;
+use std::fs;
+use std::io::{self, BufRead, Write};
+use std::path::{Path, PathBuf};
+use std::process::{ChildStderr, ChildStdout, Command, Stdio};
+use std::thread;
+use std::time::{Duration, SystemTime};
+mod bun_target;
+use bun_target::BunTarget;
+
+fn main() {
+    println!("cargo:rerun-if-changed=");
+
+    if let Err(e) = run_build() {
+        println!("cargo:warning=build.rs failed: {e}");
+        std::process::exit(1);
+    }
+}
+
+
+
+
+
+fn run_build() -> io::Result<()> {
+    let manifest_dir =
+        PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"));
+    let package_dir = manifest_dir.join("package");
+    let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo"));
+    let output_path = out_dir.join("client-cli");
+
+    let bun_tgt = BunTarget::from_cargo_env()
+        .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+
+    // Optional: warn if using a Bun target that’s marked unsupported in your chart
+    if matches!(bun_tgt, BunTarget::WindowsArm64) {
+        println!("cargo:warning=bun-windows-arm64 is marked unsupported in the compatibility chart");
+    }
+
+    warn(&format!("Building CLI into: {}", output_path.display()));
+
+    // --- bun install (in ./package), keep temps inside OUT_DIR ---
+    let mut install = Command::new("bun")
+        .current_dir(&package_dir)
+        .env("TMPDIR", &out_dir)
+        .arg("install")
+        .stdin(Stdio::null())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun install`: {e}")))?;
+
+    let install_join = stream_child("bun install", install.stdout.take(), install.stderr.take());
+    let install_status = install.wait()?;
+    // ensure streams finish
+    join_streams(install_join);
+
+    if !install_status.success() {
+        let code = install_status.code().unwrap_or(1);
+        return Err(io::Error::new(
+            io::ErrorKind::Other,
+            format!("bun install failed with status {code}"),
+        ));
+    }
+
+    let target = env::var("TARGET").unwrap();
+
+
+
+
+    // --- bun build (in ./package), emit to OUT_DIR, keep temps inside OUT_DIR ---
+    let mut build = Command::new("bun")
+        .current_dir(&package_dir)
+        .env("TMPDIR", &out_dir)
+        .arg("build")
+        .arg("./cli.ts")
+        .arg(format!("--target={}", bun_tgt.as_bun_flag()))
+        .arg("--compile")
+        .arg("--outfile")
+        .arg(&output_path)
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun build`: {e}")))?;
+
+    let build_join = stream_child("bun build", build.stdout.take(), build.stderr.take());
+    let status = build.wait()?;
+    // ensure streams finish
+    join_streams(build_join);
+
+    if status.success() {
+        info("bun build succeeded");
+    } else {
+        let code = status.code().unwrap_or(1);
+        warn(&format!("bun build failed with status: {code}"));
+        return Err(io::Error::new(io::ErrorKind::Other, "bun build failed"));
+    }
+
+    // Ensure the output is executable (after it exists)
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        let mut perms = fs::metadata(&output_path)?.permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&output_path, perms)?;
+    }
+
+    println!("cargo:warning=Built CLI at {}", output_path.display());
+    println!("cargo:rustc-env=CLIENT_CLI_BIN={}", output_path.display());
+
+    // --- Cleanup stray .bun-build temp files (conservative: older than 5 minutes) ---
+    for dir in [&manifest_dir, &package_dir, &out_dir] {
+        if let Err(e) = remove_bun_temp_files(dir, Some(Duration::from_secs(5 * 60))) {
+            println!("cargo:warning=cleanup in {} failed: {e}", dir.display());
+        }
+    }
+
+    Ok(())
+}
+
+// Spawn readers for child's stdout/stderr so we don't deadlock on pipe buffers
+fn stream_child(
+    tag: &str,
+    stdout: Option<ChildStdout>,
+    stderr: Option<ChildStderr>,
+) -> (
+    Option<thread::JoinHandle<()>>,
+    Option<thread::JoinHandle<()>>,
+) {
+    let t1 = stdout.map(|out| {
+        let tag = tag.to_string();
+        thread::spawn(move || {
+            let reader = io::BufReader::new(out);
+            for line in reader.lines() {
+                info(&format!("[{tag} stdout] {}", line.unwrap_or_default()));
+            }
+        })
+    });
+    let t2 = stderr.map(|err| {
+        let tag = tag.to_string();
+        thread::spawn(move || {
+            let reader = io::BufReader::new(err);
+            for line in reader.lines() {
+                warn(&format!("[{tag} stderr] {}", line.unwrap_or_default()));
+            }
+        })
+    });
+    (t1, t2)
+}
+
+fn join_streams(
+    joins: (
+        Option<thread::JoinHandle<()>>,
+        Option<thread::JoinHandle<()>>,
+    ),
+) {
+    if let Some(j) = joins.0 {
+        let _ = j.join();
+    }
+    if let Some(j) = joins.1 {
+        let _ = j.join();
+    }
+}
+
+fn remove_bun_temp_files(dir: &Path, older_than: Option<Duration>) -> io::Result<()> {
+    let now = SystemTime::now();
+    for entry in fs::read_dir(dir)? {
+        let entry = entry?;
+        let path = entry.path();
+        if !path.is_file() {
+            continue;
+        }
+
+        // Files like ".1860e7df40ff1bef-00000000.bun-build"
+        let name = entry.file_name();
+        let name = name.to_string_lossy();
+        let looks_like_bun_temp = name.starts_with('.') && name.ends_with(".bun-build");
+
+        if !looks_like_bun_temp {
+            continue;
+        }
+
+        if let Some(age) = older_than {
+            if let Ok(meta) = entry.metadata() {
+                if let Ok(modified) = meta.modified() {
+                    if now.duration_since(modified).unwrap_or_default() < age {
+                        // too new; skip to avoid racing an in-flight builder
+                        continue;
+                    }
+                }
+            }
+        }
+
+        match fs::remove_file(&path) {
+            Ok(_) => println!("cargo:warning=removed stray bun temp {}", path.display()),
+            Err(e) => println!("cargo:warning=failed to remove {}: {e}", path.display()),
+        }
+    }
+    Ok(())
+}
+
+fn warn(msg: &str) {
+    let _ = writeln!(io::stderr(), "[build.rs] {msg}");
+    println!("cargo:warning={msg}");
+}
+
+fn info(msg: &str) {
+    let _ = writeln!(io::stderr(), "[build.rs] {msg}");
+    println!("cargo:warning=INFO|{msg}");
+}
--- a/crates/cli/bun_target.rs
+++ b/crates/cli/bun_target.rs
@@ -0,0 +1,125 @@
+use std::env;
+use std::fmt;
+
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+pub enum BunTarget {
+    LinuxX64Glibc,
+    LinuxArm64Glibc,
+    LinuxX64Musl,
+    LinuxArm64Musl,
+    WindowsX64,
+    WindowsArm64,
+    MacX64,
+    MacArm64,
+}
+
+impl BunTarget {
+    pub const fn as_bun_flag(self) -> &'static str {
+        match self {
+            BunTarget::LinuxX64Glibc   => "bun-linux-x64",
+            BunTarget::LinuxArm64Glibc => "bun-linux-arm64",
+            BunTarget::LinuxX64Musl    => "bun-linux-x64-musl",
+            BunTarget::LinuxArm64Musl  => "bun-linux-arm64-musl",
+            BunTarget::WindowsX64      => "bun-windows-x64",
+            BunTarget::WindowsArm64    => "bun-windows-arm64",
+            BunTarget::MacX64          => "bun-darwin-x64",
+            BunTarget::MacArm64        => "bun-darwin-arm64",
+        }
+    }
+
+
+    pub const fn rust_triples(self) -> &'static [&'static str] {
+        match self {
+            BunTarget::LinuxX64Glibc   => &["x86_64-unknown-linux-gnu", "x86_64-unknown-linux-gnu.2.17"],
+            BunTarget::LinuxArm64Glibc => &["aarch64-unknown-linux-gnu"],
+            BunTarget::LinuxX64Musl    => &["x86_64-unknown-linux-musl"],
+            BunTarget::LinuxArm64Musl  => &["aarch64-unknown-linux-musl"],
+            BunTarget::WindowsX64      => &["x86_64-pc-windows-msvc"],
+            BunTarget::WindowsArm64    => &["aarch64-pc-windows-msvc"], // chart says unsupported; still map
+            BunTarget::MacX64          => &["x86_64-apple-darwin"],
+            BunTarget::MacArm64        => &["aarch64-apple-darwin"],
+        }
+    }
+
+    pub fn from_rust_target(triple: &str) -> Option<Self> {
+        let norm = triple.trim();
+        if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
+            return Some(BunTarget::LinuxX64Glibc);
+        }
+        if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
+            return Some(BunTarget::LinuxArm64Glibc);
+        }
+        if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("musl") {
+            return Some(BunTarget::LinuxX64Musl);
+        }
+        if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("musl") {
+            return Some(BunTarget::LinuxArm64Musl);
+        }
+        if norm == "x86_64-pc-windows-msvc" {
+            return Some(BunTarget::WindowsX64);
+        }
+        if norm == "aarch64-pc-windows-msvc" {
+            return Some(BunTarget::WindowsArm64);
+        }
+        if norm == "x86_64-apple-darwin" {
+            return Some(BunTarget::MacX64);
+        }
+        if norm == "aarch64-apple-darwin" {
+            return Some(BunTarget::MacArm64);
+        }
+        for bt in [
+            BunTarget::LinuxX64Glibc,
+            BunTarget::LinuxArm64Glibc,
+            BunTarget::LinuxX64Musl,
+            BunTarget::LinuxArm64Musl,
+            BunTarget::WindowsX64,
+            BunTarget::WindowsArm64,
+            BunTarget::MacX64,
+            BunTarget::MacArm64,
+        ] {
+            for &t in bt.rust_triples() {
+                if t == norm {
+                    return Some(bt);
+                }
+            }
+        }
+        None
+    }
+
+
+    pub fn from_cargo_env() -> Result<Self, BunTargetError> {
+        if let Ok(triple) = env::var("TARGET") {
+            if let Some(bt) = Self::from_rust_target(&triple) {
+                return Ok(bt);
+            }
+            return Err(BunTargetError::UnknownTriple(triple));
+        }
+
+        let os  = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
+        let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
+        let envv = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default();
+        let vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_else(|_| "unknown".into());
+
+        let triple = format!("{}-{}-{}-{}", arch, vendor, os, if envv.is_empty() { "gnu" } else { &envv });
+        if let Some(bt) = Self::from_rust_target(&triple) {
+            Ok(bt)
+        } else {
+            Err(BunTargetError::UnknownTriple(triple))
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum BunTargetError {
+    UnknownTriple(String),
+}
+
+impl fmt::Display for BunTargetError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            BunTargetError::UnknownTriple(t) => write!(f, "unrecognized Rust target triple: {t}"),
+        }
+    }
+}
+
+impl std::error::Error for BunTargetError {}
--- a/crates/cli/package/cli.ts
+++ b/crates/cli/package/cli.ts
@@ -0,0 +1,339 @@
+#!/usr/bin/env bun
+
+import OpenAI from "openai";
+import { parseArgs } from "util";
+
+// =====================
+// Config
+// =====================
+const DEFAULT_MODEL = "gemma-3-1b-it";
+const DEFAULT_MAX_TOKENS = 256;
+
+// Toggle this to reduce log overhead during timing runs
+const PRINT_CHUNK_DEBUG = false;
+
+// How many rows to show in the timing tables
+const SHOW_FIRST_N = 3;
+const SHOW_SLOWEST_N = 3;
+
+// =====================
+// Helpers
+// =====================
+const now = () => performance.now();
+
+type ChunkStat = {
+    index: number;
+    tSinceRequestStartMs: number;
+    dtSincePrevMs: number;
+    contentChars: number;
+};
+
+function printHelp() {
+    console.log(`
+./cli [options] [prompt]
+
+Simple CLI tool for testing the local OpenAI-compatible API server.
+
+Options:
+  --model <model>     Model to use (default: gemma-3-1b-it)
+  --prompt <prompt>   The prompt to send (can also be provided as positional argument)
+  --list-models       List all available models from the server
+  --help              Show this help message
+
+Examples:
+  ./cli "What is the capital of France?"
+  ./cli --model gemma-3-1b-it --prompt "Hello, world!"
+  ./cli --prompt "Who was the 16th president of the United States?"
+  ./cli --list-models
+
+The server must be running at http://localhost:8080
+`);
+}
+
+const { values, positionals } = parseArgs({
+    args: process.argv.slice(2),
+    options: {
+        model: { type: "string" },
+        prompt: { type: "string" },
+        help: { type: "boolean" },
+        "list-models": { type: "boolean" },
+    },
+    strict: false,
+    allowPositionals: true,
+});
+
+async function requestLocalOpenAI(model: string, userPrompt: string) {
+    const openai = new OpenAI({
+        baseURL: "http://localhost:8080/v1",
+        apiKey: "not used",
+    });
+    try {
+        console.log("[DEBUG] Creating chat completion request...");
+        return openai.chat.completions.create({
+            model,
+            max_tokens: DEFAULT_MAX_TOKENS,
+            stream: true,
+            messages: [
+                {
+                    role: "system",
+                    content: "You are a helpful assistant who responds thoughtfully and concisely.",
+                },
+                { role: "user", content: userPrompt },
+            ],
+        });
+    } catch (e: any) {
+        console.error("[ERROR] Failed to connect to local OpenAI server:", e.message);
+        console.error("[HINT] Make sure the server is running at http://localhost:8080");
+        console.error("[HINT] Start it with: ./run_server.sh");
+        throw e;
+    }
+}
+
+async function listModels() {
+    const openai = new OpenAI({
+        baseURL: "http://localhost:8080/v1",
+        apiKey: "not used",
+    });
+    try {
+        const models = await openai.models.list();
+        console.log(`[INFO] Available models from http://localhost:8080/v1:`);
+        console.log("---");
+
+        if (models.data && models.data.length > 0) {
+            models.data.forEach((model, index) => {
+                console.log(`${index + 1}. ${model.id}`);
+                console.log(`   Owner: ${model.owned_by}`);
+                console.log(`   Created: ${new Date(model.created * 1000).toISOString()}`);
+                console.log("");
+            });
+            console.log(`Total: ${models.data.length} models available`);
+        } else {
+            console.log("No models found.");
+        }
+    } catch (e: any) {
+        console.error("[ERROR] Failed to fetch models from local OpenAI server:", e.message);
+        console.error("[HINT] Make sure the server is running at http://localhost:8080");
+        console.error("[HINT] Start it with: ./run_server.sh");
+        throw e;
+    }
+}
+
+// =====================
+// Timing math
+// =====================
+function median(nums: number[]) {
+    if (nums.length === 0) return 0;
+    const s = [...nums].sort((a, b) => a - b);
+    const mid = Math.floor(s.length / 2);
+    return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
+}
+
+function quantile(nums: number[], q: number) {
+    if (nums.length === 0) return 0;
+    const s = [...nums].sort((a, b) => a - b);
+    const pos = (s.length - 1) * q;
+    const base = Math.floor(pos);
+    const rest = pos - base;
+    return s[base + 1] !== undefined ? s[base] + rest * (s[base + 1] - s[base]) : s[base];
+}
+
+function ms(n: number) {
+    return `${n.toFixed(1)} ms`;
+}
+
+// =====================
+// Main
+// =====================
+async function main() {
+    const tProgramStart = now();
+
+    if (values.help) {
+        printHelp();
+        process.exit(0);
+    }
+
+    if (values["list-models"]) {
+        try {
+            await listModels();
+            process.exit(0);
+        } catch (error: any) {
+            console.error("\n[ERROR] Failed to list models:", error.message);
+            process.exit(1);
+        }
+    }
+
+    const prompt = values.prompt ?? positionals[0];
+
+    if (!prompt) {
+        console.error("[ERROR] No prompt provided!");
+        printHelp();
+        process.exit(1);
+    }
+
+    const model = values.model || DEFAULT_MODEL;
+
+    console.log(`[INFO] Using model: ${model}`);
+    console.log(`[INFO] Prompt: ${prompt}`);
+    console.log(`[INFO] Connecting to: http://localhost:8080/v1`);
+    console.log("---");
+
+    const tBeforeRequest = now();
+
+    try {
+        console.log("[DEBUG] Initiating request to OpenAI server...");
+        const response = await requestLocalOpenAI(model, prompt);
+        const tAfterCreate = now();
+
+        // Streaming handling + timing
+        let fullResponse = "";
+        let chunkCount = 0;
+
+        const chunkStats: ChunkStat[] = [];
+        let tFirstChunk: number | null = null;
+        let tPrevChunk: number | null = null;
+
+        console.log("[INFO] Waiting for model to generate response...");
+        let loadingInterval;
+        if (!PRINT_CHUNK_DEBUG) {
+            // Show loading animation only if not in debug mode
+            const loadingChars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+            let i = 0;
+            process.stdout.write('\r[INFO] Thinking  ');
+            loadingInterval = setInterval(() => {
+                process.stdout.write(`\r[INFO] Thinking ${loadingChars[i++ % loadingChars.length]} `);
+            }, 80);
+        } else {
+            console.log("[DEBUG] Starting to receive streaming response...");
+        }
+
+        for await (const chunk of response) {
+            // Clear loading animation on first chunk
+            if (loadingInterval) {
+                clearInterval(loadingInterval);
+                process.stdout.write('\r                      \r');
+            }
+            const tNow = now();
+            chunkCount++;
+
+            // Extract content (delta) if present
+            const content = chunk.choices?.[0]?.delta?.content ?? "";
+            if (PRINT_CHUNK_DEBUG) {
+                console.log(`[DEBUG] Received chunk #${chunkCount}:`, JSON.stringify(chunk));
+                if (content) console.log(`[DEBUG] Chunk content: "${content}"`);
+            }
+
+            if (content) {
+                process.stdout.write(content);
+                fullResponse += content;
+            }
+
+            if (tFirstChunk === null) tFirstChunk = tNow;
+
+            const dtSincePrev = tPrevChunk === null ? 0 : tNow - tPrevChunk;
+            chunkStats.push({
+                index: chunkCount,
+                tSinceRequestStartMs: tNow - tBeforeRequest,
+                dtSincePrevMs: dtSincePrev,
+                contentChars: content.length,
+            });
+
+            tPrevChunk = tNow;
+        }
+
+        // =========
+        // Summary
+        // =========
+        const tStreamEnd = now();
+        const totalChars = fullResponse.length;
+
+        console.log("\n---");
+        console.log(`[DEBUG] Stream completed after ${chunkCount} chunks`);
+        console.log(`[INFO] Response completed. Total length: ${totalChars} characters`);
+
+        // Build timing metrics
+        const ttfbMs = (tFirstChunk ?? tStreamEnd) - tAfterCreate; // time from create() resolved → first chunk
+        const createOverheadMs = tAfterCreate - tBeforeRequest;    // time spent awaiting create() promise
+        const totalSinceRequestMs = tStreamEnd - tBeforeRequest;   // from just before create() to last chunk
+        const streamDurationMs =
+            tFirstChunk === null ? 0 : tStreamEnd - tFirstChunk;
+
+        const gaps = chunkStats
+            .map((c) => c.dtSincePrevMs)
+            // ignore the first "gap" which is 0 by construction
+            .slice(1);
+
+        const avgGapMs = gaps.length ? gaps.reduce((a, b) => a + b, 0) / gaps.length : 0;
+        const medGapMs = median(gaps);
+        const p95GapMs = quantile(gaps, 0.95);
+
+        let maxGapMs = 0;
+        let maxGapAtChunk = 0;
+        for (let i = 0; i < gaps.length; i++) {
+            if (gaps[i] > maxGapMs) {
+                maxGapMs = gaps[i];
+                maxGapAtChunk = i + 2; // +1 to move from 0-based, +1 because we sliced starting at second chunk
+            }
+        }
+
+        // Pretty print summary
+        console.log("\n=== Timing Summary ===");
+        console.log(`create() await time:        ${ms(createOverheadMs)}`);
+        console.log(`TTFB (to 1st chunk):        ${ms(ttfbMs)}`);
+        console.log(`Stream duration:            ${ms(streamDurationMs)}`);
+        console.log(`End-to-end (req→last):      ${ms(totalSinceRequestMs)}`);
+        console.log(`Chunks:                     ${chunkCount}`);
+        console.log(`Total content chars:        ${totalChars}`);
+        console.log(`Avg chars/chunk:            ${(chunkCount ? totalChars / chunkCount : 0).toFixed(1)}`);
+        console.log(`Inter-chunk gap (avg):      ${ms(avgGapMs)}`);
+        console.log(`Inter-chunk gap (median):   ${ms(medGapMs)}`);
+        console.log(`Inter-chunk gap (p95):      ${ms(p95GapMs)}`);
+        if (gaps.length > 0) {
+            console.log(`Largest gap:                ${ms(maxGapMs)} (before chunk #${maxGapAtChunk})`);
+        }
+
+        // Small tables: first N and slowest N gaps
+        const firstRows = chunkStats.slice(0, SHOW_FIRST_N).map((c) => ({
+            chunk: c.index,
+            "t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
+            "dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
+            "chars": c.contentChars,
+        }));
+
+        const slowestRows = chunkStats
+            .slice(1) // skip first (no meaningful gap)
+            .sort((a, b) => b.dtSincePrevMs - a.dtSincePrevMs)
+            .slice(0, SHOW_SLOWEST_N)
+            .map((c) => ({
+                chunk: c.index,
+                "t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
+                "dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
+                "chars": c.contentChars,
+            }));
+
+        if (firstRows.length > 0) {
+            console.log("\n--- First chunk timings ---");
+            // @ts-ignore Bun/Node support console.table
+            console.table(firstRows);
+        }
+
+        if (slowestRows.length > 0) {
+            console.log(`\n--- Slowest ${SHOW_SLOWEST_N} gaps ---`);
+            // @ts-ignore
+            console.table(slowestRows);
+        }
+
+        const tProgramEnd = now();
+        console.log("\n=== Program Overhead ===");
+        console.log(`Total program runtime:      ${ms(tProgramEnd - tProgramStart)}`);
+
+    } catch (error: any) {
+        console.error("\n[ERROR] Request failed:", error.message);
+        process.exit(1);
+    }
+}
+
+// Run the main function
+main().catch((error) => {
+    console.error("[FATAL ERROR]:", error);
+    process.exit(1);
+});
--- a/crates/cli/package/package.json
+++ b/crates/cli/package/package.json
@@ -0,0 +1,11 @@
+{
+  "name": "cli",
+  "main": "cli.ts",
+  "scripts": {
+    "build": "bun build cli.ts --compile --outfile cli"
+  },
+  "dependencies": {
+    "install": "^0.13.0",
+    "openai": "^5.16.0"
+  }
+}
--- a/crates/cli/src/main.rs
+++ b/crates/cli/src/main.rs
@@ -0,0 +1,32 @@
+use std::{env, fs, io, path::PathBuf, process::Command};
+
+#[cfg(unix)]
+use std::os::unix::fs::PermissionsExt;
+
+fn main() -> io::Result<()> {
+    // Absolute path provided by build.rs at compile time.
+    // `include_bytes!` accepts string literals; `env!` expands to a literal at compile time.
+    const CLIENT_CLI: &[u8] = include_bytes!(env!("CLIENT_CLI_BIN"));
+
+    // Write to a temp file
+    let mut tmp = env::temp_dir();
+    tmp.push("client-cli-embedded");
+
+    fs::write(&tmp, CLIENT_CLI)?;
+
+    // Ensure it's executable on Unix
+    #[cfg(unix)]
+    {
+        let mut perms = fs::metadata(&tmp)?.permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&tmp, perms)?;
+    }
+
+    // Run it
+    let status = Command::new(&tmp).arg("--version").status()?;
+    if !status.success() {
+        return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed"));
+    }
+
+    Ok(())
+}