mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
move cli into crates and stage for release
This commit is contained in:
11
crates/cli/Cargo.toml
Normal file
11
crates/cli/Cargo.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "cli"
|
||||
version.workspace = true
|
||||
edition = "2021"
|
||||
build = "build.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "cli"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
23
crates/cli/README.md
Normal file
23
crates/cli/README.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# cli
|
||||
|
||||
A Rust/Typescript Hybrid
|
||||
|
||||
```console
|
||||
./cli [options] [prompt]
|
||||
|
||||
Simple CLI tool for testing the local OpenAI-compatible API server.
|
||||
|
||||
Options:
|
||||
--model <model> Model to use (default: gemma-3-1b-it)
|
||||
--prompt <prompt> The prompt to send (can also be provided as positional argument)
|
||||
--list-models List all available models from the server
|
||||
--help Show this help message
|
||||
|
||||
Examples:
|
||||
./cli "What is the capital of France?"
|
||||
./cli --model gemma-3-1b-it --prompt "Hello, world!"
|
||||
./cli --prompt "Who was the 16th president of the United States?"
|
||||
./cli --list-models
|
||||
|
||||
The server must be running at http://localhost:8080
|
||||
```
|
209
crates/cli/build.rs
Normal file
209
crates/cli/build.rs
Normal file
@@ -0,0 +1,209 @@
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::{self, BufRead, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{ChildStderr, ChildStdout, Command, Stdio};
|
||||
use std::thread;
|
||||
use std::time::{Duration, SystemTime};
|
||||
mod bun_target;
|
||||
use bun_target::BunTarget;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=");
|
||||
|
||||
if let Err(e) = run_build() {
|
||||
println!("cargo:warning=build.rs failed: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
fn run_build() -> io::Result<()> {
|
||||
let manifest_dir =
|
||||
PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"));
|
||||
let package_dir = manifest_dir.join("package");
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo"));
|
||||
let output_path = out_dir.join("client-cli");
|
||||
|
||||
let bun_tgt = BunTarget::from_cargo_env()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
||||
|
||||
// Optional: warn if using a Bun target that’s marked unsupported in your chart
|
||||
if matches!(bun_tgt, BunTarget::WindowsArm64) {
|
||||
println!("cargo:warning=bun-windows-arm64 is marked unsupported in the compatibility chart");
|
||||
}
|
||||
|
||||
warn(&format!("Building CLI into: {}", output_path.display()));
|
||||
|
||||
// --- bun install (in ./package), keep temps inside OUT_DIR ---
|
||||
let mut install = Command::new("bun")
|
||||
.current_dir(&package_dir)
|
||||
.env("TMPDIR", &out_dir)
|
||||
.arg("install")
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun install`: {e}")))?;
|
||||
|
||||
let install_join = stream_child("bun install", install.stdout.take(), install.stderr.take());
|
||||
let install_status = install.wait()?;
|
||||
// ensure streams finish
|
||||
join_streams(install_join);
|
||||
|
||||
if !install_status.success() {
|
||||
let code = install_status.code().unwrap_or(1);
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("bun install failed with status {code}"),
|
||||
));
|
||||
}
|
||||
|
||||
let target = env::var("TARGET").unwrap();
|
||||
|
||||
|
||||
|
||||
|
||||
// --- bun build (in ./package), emit to OUT_DIR, keep temps inside OUT_DIR ---
|
||||
let mut build = Command::new("bun")
|
||||
.current_dir(&package_dir)
|
||||
.env("TMPDIR", &out_dir)
|
||||
.arg("build")
|
||||
.arg("./cli.ts")
|
||||
.arg(format!("--target={}", bun_tgt.as_bun_flag()))
|
||||
.arg("--compile")
|
||||
.arg("--outfile")
|
||||
.arg(&output_path)
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun build`: {e}")))?;
|
||||
|
||||
let build_join = stream_child("bun build", build.stdout.take(), build.stderr.take());
|
||||
let status = build.wait()?;
|
||||
// ensure streams finish
|
||||
join_streams(build_join);
|
||||
|
||||
if status.success() {
|
||||
info("bun build succeeded");
|
||||
} else {
|
||||
let code = status.code().unwrap_or(1);
|
||||
warn(&format!("bun build failed with status: {code}"));
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "bun build failed"));
|
||||
}
|
||||
|
||||
// Ensure the output is executable (after it exists)
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mut perms = fs::metadata(&output_path)?.permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(&output_path, perms)?;
|
||||
}
|
||||
|
||||
println!("cargo:warning=Built CLI at {}", output_path.display());
|
||||
println!("cargo:rustc-env=CLIENT_CLI_BIN={}", output_path.display());
|
||||
|
||||
// --- Cleanup stray .bun-build temp files (conservative: older than 5 minutes) ---
|
||||
for dir in [&manifest_dir, &package_dir, &out_dir] {
|
||||
if let Err(e) = remove_bun_temp_files(dir, Some(Duration::from_secs(5 * 60))) {
|
||||
println!("cargo:warning=cleanup in {} failed: {e}", dir.display());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Spawn readers for child's stdout/stderr so we don't deadlock on pipe buffers
|
||||
fn stream_child(
|
||||
tag: &str,
|
||||
stdout: Option<ChildStdout>,
|
||||
stderr: Option<ChildStderr>,
|
||||
) -> (
|
||||
Option<thread::JoinHandle<()>>,
|
||||
Option<thread::JoinHandle<()>>,
|
||||
) {
|
||||
let t1 = stdout.map(|out| {
|
||||
let tag = tag.to_string();
|
||||
thread::spawn(move || {
|
||||
let reader = io::BufReader::new(out);
|
||||
for line in reader.lines() {
|
||||
info(&format!("[{tag} stdout] {}", line.unwrap_or_default()));
|
||||
}
|
||||
})
|
||||
});
|
||||
let t2 = stderr.map(|err| {
|
||||
let tag = tag.to_string();
|
||||
thread::spawn(move || {
|
||||
let reader = io::BufReader::new(err);
|
||||
for line in reader.lines() {
|
||||
warn(&format!("[{tag} stderr] {}", line.unwrap_or_default()));
|
||||
}
|
||||
})
|
||||
});
|
||||
(t1, t2)
|
||||
}
|
||||
|
||||
fn join_streams(
|
||||
joins: (
|
||||
Option<thread::JoinHandle<()>>,
|
||||
Option<thread::JoinHandle<()>>,
|
||||
),
|
||||
) {
|
||||
if let Some(j) = joins.0 {
|
||||
let _ = j.join();
|
||||
}
|
||||
if let Some(j) = joins.1 {
|
||||
let _ = j.join();
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_bun_temp_files(dir: &Path, older_than: Option<Duration>) -> io::Result<()> {
|
||||
let now = SystemTime::now();
|
||||
for entry in fs::read_dir(dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Files like ".1860e7df40ff1bef-00000000.bun-build"
|
||||
let name = entry.file_name();
|
||||
let name = name.to_string_lossy();
|
||||
let looks_like_bun_temp = name.starts_with('.') && name.ends_with(".bun-build");
|
||||
|
||||
if !looks_like_bun_temp {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(age) = older_than {
|
||||
if let Ok(meta) = entry.metadata() {
|
||||
if let Ok(modified) = meta.modified() {
|
||||
if now.duration_since(modified).unwrap_or_default() < age {
|
||||
// too new; skip to avoid racing an in-flight builder
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match fs::remove_file(&path) {
|
||||
Ok(_) => println!("cargo:warning=removed stray bun temp {}", path.display()),
|
||||
Err(e) => println!("cargo:warning=failed to remove {}: {e}", path.display()),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn warn(msg: &str) {
|
||||
let _ = writeln!(io::stderr(), "[build.rs] {msg}");
|
||||
println!("cargo:warning={msg}");
|
||||
}
|
||||
|
||||
fn info(msg: &str) {
|
||||
let _ = writeln!(io::stderr(), "[build.rs] {msg}");
|
||||
println!("cargo:warning=INFO|{msg}");
|
||||
}
|
125
crates/cli/bun_target.rs
Normal file
125
crates/cli/bun_target.rs
Normal file
@@ -0,0 +1,125 @@
|
||||
use std::env;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum BunTarget {
|
||||
LinuxX64Glibc,
|
||||
LinuxArm64Glibc,
|
||||
LinuxX64Musl,
|
||||
LinuxArm64Musl,
|
||||
WindowsX64,
|
||||
WindowsArm64,
|
||||
MacX64,
|
||||
MacArm64,
|
||||
}
|
||||
|
||||
impl BunTarget {
|
||||
pub const fn as_bun_flag(self) -> &'static str {
|
||||
match self {
|
||||
BunTarget::LinuxX64Glibc => "bun-linux-x64",
|
||||
BunTarget::LinuxArm64Glibc => "bun-linux-arm64",
|
||||
BunTarget::LinuxX64Musl => "bun-linux-x64-musl",
|
||||
BunTarget::LinuxArm64Musl => "bun-linux-arm64-musl",
|
||||
BunTarget::WindowsX64 => "bun-windows-x64",
|
||||
BunTarget::WindowsArm64 => "bun-windows-arm64",
|
||||
BunTarget::MacX64 => "bun-darwin-x64",
|
||||
BunTarget::MacArm64 => "bun-darwin-arm64",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub const fn rust_triples(self) -> &'static [&'static str] {
|
||||
match self {
|
||||
BunTarget::LinuxX64Glibc => &["x86_64-unknown-linux-gnu", "x86_64-unknown-linux-gnu.2.17"],
|
||||
BunTarget::LinuxArm64Glibc => &["aarch64-unknown-linux-gnu"],
|
||||
BunTarget::LinuxX64Musl => &["x86_64-unknown-linux-musl"],
|
||||
BunTarget::LinuxArm64Musl => &["aarch64-unknown-linux-musl"],
|
||||
BunTarget::WindowsX64 => &["x86_64-pc-windows-msvc"],
|
||||
BunTarget::WindowsArm64 => &["aarch64-pc-windows-msvc"], // chart says unsupported; still map
|
||||
BunTarget::MacX64 => &["x86_64-apple-darwin"],
|
||||
BunTarget::MacArm64 => &["aarch64-apple-darwin"],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_rust_target(triple: &str) -> Option<Self> {
|
||||
let norm = triple.trim();
|
||||
if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
|
||||
return Some(BunTarget::LinuxX64Glibc);
|
||||
}
|
||||
if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
|
||||
return Some(BunTarget::LinuxArm64Glibc);
|
||||
}
|
||||
if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("musl") {
|
||||
return Some(BunTarget::LinuxX64Musl);
|
||||
}
|
||||
if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("musl") {
|
||||
return Some(BunTarget::LinuxArm64Musl);
|
||||
}
|
||||
if norm == "x86_64-pc-windows-msvc" {
|
||||
return Some(BunTarget::WindowsX64);
|
||||
}
|
||||
if norm == "aarch64-pc-windows-msvc" {
|
||||
return Some(BunTarget::WindowsArm64);
|
||||
}
|
||||
if norm == "x86_64-apple-darwin" {
|
||||
return Some(BunTarget::MacX64);
|
||||
}
|
||||
if norm == "aarch64-apple-darwin" {
|
||||
return Some(BunTarget::MacArm64);
|
||||
}
|
||||
for bt in [
|
||||
BunTarget::LinuxX64Glibc,
|
||||
BunTarget::LinuxArm64Glibc,
|
||||
BunTarget::LinuxX64Musl,
|
||||
BunTarget::LinuxArm64Musl,
|
||||
BunTarget::WindowsX64,
|
||||
BunTarget::WindowsArm64,
|
||||
BunTarget::MacX64,
|
||||
BunTarget::MacArm64,
|
||||
] {
|
||||
for &t in bt.rust_triples() {
|
||||
if t == norm {
|
||||
return Some(bt);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
|
||||
pub fn from_cargo_env() -> Result<Self, BunTargetError> {
|
||||
if let Ok(triple) = env::var("TARGET") {
|
||||
if let Some(bt) = Self::from_rust_target(&triple) {
|
||||
return Ok(bt);
|
||||
}
|
||||
return Err(BunTargetError::UnknownTriple(triple));
|
||||
}
|
||||
|
||||
let os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
|
||||
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
|
||||
let envv = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default();
|
||||
let vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_else(|_| "unknown".into());
|
||||
|
||||
let triple = format!("{}-{}-{}-{}", arch, vendor, os, if envv.is_empty() { "gnu" } else { &envv });
|
||||
if let Some(bt) = Self::from_rust_target(&triple) {
|
||||
Ok(bt)
|
||||
} else {
|
||||
Err(BunTargetError::UnknownTriple(triple))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BunTargetError {
|
||||
UnknownTriple(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for BunTargetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
BunTargetError::UnknownTriple(t) => write!(f, "unrecognized Rust target triple: {t}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for BunTargetError {}
|
339
crates/cli/package/cli.ts
Executable file
339
crates/cli/package/cli.ts
Executable file
@@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import OpenAI from "openai";
|
||||
import { parseArgs } from "util";
|
||||
|
||||
// =====================
|
||||
// Config
|
||||
// =====================
|
||||
const DEFAULT_MODEL = "gemma-3-1b-it";
|
||||
const DEFAULT_MAX_TOKENS = 256;
|
||||
|
||||
// Toggle this to reduce log overhead during timing runs
|
||||
const PRINT_CHUNK_DEBUG = false;
|
||||
|
||||
// How many rows to show in the timing tables
|
||||
const SHOW_FIRST_N = 3;
|
||||
const SHOW_SLOWEST_N = 3;
|
||||
|
||||
// =====================
|
||||
// Helpers
|
||||
// =====================
|
||||
const now = () => performance.now();
|
||||
|
||||
type ChunkStat = {
|
||||
index: number;
|
||||
tSinceRequestStartMs: number;
|
||||
dtSincePrevMs: number;
|
||||
contentChars: number;
|
||||
};
|
||||
|
||||
function printHelp() {
|
||||
console.log(`
|
||||
./cli [options] [prompt]
|
||||
|
||||
Simple CLI tool for testing the local OpenAI-compatible API server.
|
||||
|
||||
Options:
|
||||
--model <model> Model to use (default: gemma-3-1b-it)
|
||||
--prompt <prompt> The prompt to send (can also be provided as positional argument)
|
||||
--list-models List all available models from the server
|
||||
--help Show this help message
|
||||
|
||||
Examples:
|
||||
./cli "What is the capital of France?"
|
||||
./cli --model gemma-3-1b-it --prompt "Hello, world!"
|
||||
./cli --prompt "Who was the 16th president of the United States?"
|
||||
./cli --list-models
|
||||
|
||||
The server must be running at http://localhost:8080
|
||||
`);
|
||||
}
|
||||
|
||||
const { values, positionals } = parseArgs({
|
||||
args: process.argv.slice(2),
|
||||
options: {
|
||||
model: { type: "string" },
|
||||
prompt: { type: "string" },
|
||||
help: { type: "boolean" },
|
||||
"list-models": { type: "boolean" },
|
||||
},
|
||||
strict: false,
|
||||
allowPositionals: true,
|
||||
});
|
||||
|
||||
async function requestLocalOpenAI(model: string, userPrompt: string) {
|
||||
const openai = new OpenAI({
|
||||
baseURL: "http://localhost:8080/v1",
|
||||
apiKey: "not used",
|
||||
});
|
||||
try {
|
||||
console.log("[DEBUG] Creating chat completion request...");
|
||||
return openai.chat.completions.create({
|
||||
model,
|
||||
max_tokens: DEFAULT_MAX_TOKENS,
|
||||
stream: true,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant who responds thoughtfully and concisely.",
|
||||
},
|
||||
{ role: "user", content: userPrompt },
|
||||
],
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error("[ERROR] Failed to connect to local OpenAI server:", e.message);
|
||||
console.error("[HINT] Make sure the server is running at http://localhost:8080");
|
||||
console.error("[HINT] Start it with: ./run_server.sh");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async function listModels() {
|
||||
const openai = new OpenAI({
|
||||
baseURL: "http://localhost:8080/v1",
|
||||
apiKey: "not used",
|
||||
});
|
||||
try {
|
||||
const models = await openai.models.list();
|
||||
console.log(`[INFO] Available models from http://localhost:8080/v1:`);
|
||||
console.log("---");
|
||||
|
||||
if (models.data && models.data.length > 0) {
|
||||
models.data.forEach((model, index) => {
|
||||
console.log(`${index + 1}. ${model.id}`);
|
||||
console.log(` Owner: ${model.owned_by}`);
|
||||
console.log(` Created: ${new Date(model.created * 1000).toISOString()}`);
|
||||
console.log("");
|
||||
});
|
||||
console.log(`Total: ${models.data.length} models available`);
|
||||
} else {
|
||||
console.log("No models found.");
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.error("[ERROR] Failed to fetch models from local OpenAI server:", e.message);
|
||||
console.error("[HINT] Make sure the server is running at http://localhost:8080");
|
||||
console.error("[HINT] Start it with: ./run_server.sh");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// =====================
|
||||
// Timing math
|
||||
// =====================
|
||||
function median(nums: number[]) {
|
||||
if (nums.length === 0) return 0;
|
||||
const s = [...nums].sort((a, b) => a - b);
|
||||
const mid = Math.floor(s.length / 2);
|
||||
return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
|
||||
}
|
||||
|
||||
function quantile(nums: number[], q: number) {
|
||||
if (nums.length === 0) return 0;
|
||||
const s = [...nums].sort((a, b) => a - b);
|
||||
const pos = (s.length - 1) * q;
|
||||
const base = Math.floor(pos);
|
||||
const rest = pos - base;
|
||||
return s[base + 1] !== undefined ? s[base] + rest * (s[base + 1] - s[base]) : s[base];
|
||||
}
|
||||
|
||||
function ms(n: number) {
|
||||
return `${n.toFixed(1)} ms`;
|
||||
}
|
||||
|
||||
// =====================
|
||||
// Main
|
||||
// =====================
|
||||
async function main() {
|
||||
const tProgramStart = now();
|
||||
|
||||
if (values.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (values["list-models"]) {
|
||||
try {
|
||||
await listModels();
|
||||
process.exit(0);
|
||||
} catch (error: any) {
|
||||
console.error("\n[ERROR] Failed to list models:", error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
const prompt = values.prompt ?? positionals[0];
|
||||
|
||||
if (!prompt) {
|
||||
console.error("[ERROR] No prompt provided!");
|
||||
printHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const model = values.model || DEFAULT_MODEL;
|
||||
|
||||
console.log(`[INFO] Using model: ${model}`);
|
||||
console.log(`[INFO] Prompt: ${prompt}`);
|
||||
console.log(`[INFO] Connecting to: http://localhost:8080/v1`);
|
||||
console.log("---");
|
||||
|
||||
const tBeforeRequest = now();
|
||||
|
||||
try {
|
||||
console.log("[DEBUG] Initiating request to OpenAI server...");
|
||||
const response = await requestLocalOpenAI(model, prompt);
|
||||
const tAfterCreate = now();
|
||||
|
||||
// Streaming handling + timing
|
||||
let fullResponse = "";
|
||||
let chunkCount = 0;
|
||||
|
||||
const chunkStats: ChunkStat[] = [];
|
||||
let tFirstChunk: number | null = null;
|
||||
let tPrevChunk: number | null = null;
|
||||
|
||||
console.log("[INFO] Waiting for model to generate response...");
|
||||
let loadingInterval;
|
||||
if (!PRINT_CHUNK_DEBUG) {
|
||||
// Show loading animation only if not in debug mode
|
||||
const loadingChars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
||||
let i = 0;
|
||||
process.stdout.write('\r[INFO] Thinking ');
|
||||
loadingInterval = setInterval(() => {
|
||||
process.stdout.write(`\r[INFO] Thinking ${loadingChars[i++ % loadingChars.length]} `);
|
||||
}, 80);
|
||||
} else {
|
||||
console.log("[DEBUG] Starting to receive streaming response...");
|
||||
}
|
||||
|
||||
for await (const chunk of response) {
|
||||
// Clear loading animation on first chunk
|
||||
if (loadingInterval) {
|
||||
clearInterval(loadingInterval);
|
||||
process.stdout.write('\r \r');
|
||||
}
|
||||
const tNow = now();
|
||||
chunkCount++;
|
||||
|
||||
// Extract content (delta) if present
|
||||
const content = chunk.choices?.[0]?.delta?.content ?? "";
|
||||
if (PRINT_CHUNK_DEBUG) {
|
||||
console.log(`[DEBUG] Received chunk #${chunkCount}:`, JSON.stringify(chunk));
|
||||
if (content) console.log(`[DEBUG] Chunk content: "${content}"`);
|
||||
}
|
||||
|
||||
if (content) {
|
||||
process.stdout.write(content);
|
||||
fullResponse += content;
|
||||
}
|
||||
|
||||
if (tFirstChunk === null) tFirstChunk = tNow;
|
||||
|
||||
const dtSincePrev = tPrevChunk === null ? 0 : tNow - tPrevChunk;
|
||||
chunkStats.push({
|
||||
index: chunkCount,
|
||||
tSinceRequestStartMs: tNow - tBeforeRequest,
|
||||
dtSincePrevMs: dtSincePrev,
|
||||
contentChars: content.length,
|
||||
});
|
||||
|
||||
tPrevChunk = tNow;
|
||||
}
|
||||
|
||||
// =========
|
||||
// Summary
|
||||
// =========
|
||||
const tStreamEnd = now();
|
||||
const totalChars = fullResponse.length;
|
||||
|
||||
console.log("\n---");
|
||||
console.log(`[DEBUG] Stream completed after ${chunkCount} chunks`);
|
||||
console.log(`[INFO] Response completed. Total length: ${totalChars} characters`);
|
||||
|
||||
// Build timing metrics
|
||||
const ttfbMs = (tFirstChunk ?? tStreamEnd) - tAfterCreate; // time from create() resolved → first chunk
|
||||
const createOverheadMs = tAfterCreate - tBeforeRequest; // time spent awaiting create() promise
|
||||
const totalSinceRequestMs = tStreamEnd - tBeforeRequest; // from just before create() to last chunk
|
||||
const streamDurationMs =
|
||||
tFirstChunk === null ? 0 : tStreamEnd - tFirstChunk;
|
||||
|
||||
const gaps = chunkStats
|
||||
.map((c) => c.dtSincePrevMs)
|
||||
// ignore the first "gap" which is 0 by construction
|
||||
.slice(1);
|
||||
|
||||
const avgGapMs = gaps.length ? gaps.reduce((a, b) => a + b, 0) / gaps.length : 0;
|
||||
const medGapMs = median(gaps);
|
||||
const p95GapMs = quantile(gaps, 0.95);
|
||||
|
||||
let maxGapMs = 0;
|
||||
let maxGapAtChunk = 0;
|
||||
for (let i = 0; i < gaps.length; i++) {
|
||||
if (gaps[i] > maxGapMs) {
|
||||
maxGapMs = gaps[i];
|
||||
maxGapAtChunk = i + 2; // +1 to move from 0-based, +1 because we sliced starting at second chunk
|
||||
}
|
||||
}
|
||||
|
||||
// Pretty print summary
|
||||
console.log("\n=== Timing Summary ===");
|
||||
console.log(`create() await time: ${ms(createOverheadMs)}`);
|
||||
console.log(`TTFB (to 1st chunk): ${ms(ttfbMs)}`);
|
||||
console.log(`Stream duration: ${ms(streamDurationMs)}`);
|
||||
console.log(`End-to-end (req→last): ${ms(totalSinceRequestMs)}`);
|
||||
console.log(`Chunks: ${chunkCount}`);
|
||||
console.log(`Total content chars: ${totalChars}`);
|
||||
console.log(`Avg chars/chunk: ${(chunkCount ? totalChars / chunkCount : 0).toFixed(1)}`);
|
||||
console.log(`Inter-chunk gap (avg): ${ms(avgGapMs)}`);
|
||||
console.log(`Inter-chunk gap (median): ${ms(medGapMs)}`);
|
||||
console.log(`Inter-chunk gap (p95): ${ms(p95GapMs)}`);
|
||||
if (gaps.length > 0) {
|
||||
console.log(`Largest gap: ${ms(maxGapMs)} (before chunk #${maxGapAtChunk})`);
|
||||
}
|
||||
|
||||
// Small tables: first N and slowest N gaps
|
||||
const firstRows = chunkStats.slice(0, SHOW_FIRST_N).map((c) => ({
|
||||
chunk: c.index,
|
||||
"t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
|
||||
"dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
|
||||
"chars": c.contentChars,
|
||||
}));
|
||||
|
||||
const slowestRows = chunkStats
|
||||
.slice(1) // skip first (no meaningful gap)
|
||||
.sort((a, b) => b.dtSincePrevMs - a.dtSincePrevMs)
|
||||
.slice(0, SHOW_SLOWEST_N)
|
||||
.map((c) => ({
|
||||
chunk: c.index,
|
||||
"t since request": `${c.tSinceRequestStartMs.toFixed(1)} ms`,
|
||||
"dt since prev": `${c.dtSincePrevMs.toFixed(1)} ms`,
|
||||
"chars": c.contentChars,
|
||||
}));
|
||||
|
||||
if (firstRows.length > 0) {
|
||||
console.log("\n--- First chunk timings ---");
|
||||
// @ts-ignore Bun/Node support console.table
|
||||
console.table(firstRows);
|
||||
}
|
||||
|
||||
if (slowestRows.length > 0) {
|
||||
console.log(`\n--- Slowest ${SHOW_SLOWEST_N} gaps ---`);
|
||||
// @ts-ignore
|
||||
console.table(slowestRows);
|
||||
}
|
||||
|
||||
const tProgramEnd = now();
|
||||
console.log("\n=== Program Overhead ===");
|
||||
console.log(`Total program runtime: ${ms(tProgramEnd - tProgramStart)}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error("\n[ERROR] Request failed:", error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the main function
|
||||
main().catch((error) => {
|
||||
console.error("[FATAL ERROR]:", error);
|
||||
process.exit(1);
|
||||
});
|
11
crates/cli/package/package.json
Normal file
11
crates/cli/package/package.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "cli",
|
||||
"main": "cli.ts",
|
||||
"scripts": {
|
||||
"build": "bun build cli.ts --compile --outfile cli"
|
||||
},
|
||||
"dependencies": {
|
||||
"install": "^0.13.0",
|
||||
"openai": "^5.16.0"
|
||||
}
|
||||
}
|
32
crates/cli/src/main.rs
Normal file
32
crates/cli/src/main.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use std::{env, fs, io, path::PathBuf, process::Command};
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
fn main() -> io::Result<()> {
|
||||
// Absolute path provided by build.rs at compile time.
|
||||
// `include_bytes!` accepts string literals; `env!` expands to a literal at compile time.
|
||||
const CLIENT_CLI: &[u8] = include_bytes!(env!("CLIENT_CLI_BIN"));
|
||||
|
||||
// Write to a temp file
|
||||
let mut tmp = env::temp_dir();
|
||||
tmp.push("client-cli-embedded");
|
||||
|
||||
fs::write(&tmp, CLIENT_CLI)?;
|
||||
|
||||
// Ensure it's executable on Unix
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let mut perms = fs::metadata(&tmp)?.permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(&tmp, perms)?;
|
||||
}
|
||||
|
||||
// Run it
|
||||
let status = Command::new(&tmp).arg("--version").status()?;
|
||||
if !status.success() {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed"));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Reference in New Issue
Block a user