mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
169 lines
4.9 KiB
TypeScript
Executable File
169 lines
4.9 KiB
TypeScript
Executable File
#!/usr/bin/env bun
|
|
|
|
import OpenAI from "openai";
|
|
import { parseArgs } from "util";
|
|
|
|
const DEFAULT_MODEL = "gemma-3-1b-it";
|
|
const DEFAULT_MAX_TOKENS = 100;
|
|
|
|
function printHelp() {
|
|
console.log(`
|
|
Usage: bun client_cli.ts [options] [prompt]
|
|
|
|
Simple CLI tool for testing the local OpenAI-compatible API server.
|
|
|
|
Options:
|
|
--model <model> Model to use (default: ${DEFAULT_MODEL})
|
|
--prompt <prompt> The prompt to send (can also be provided as positional argument)
|
|
--list-models List all available models from the server
|
|
--help Show this help message
|
|
|
|
Examples:
|
|
./cli.ts "What is the capital of France?"
|
|
./cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
|
|
./cli.ts --prompt "Who was the 16th president of the United States?"
|
|
./cli.ts --list-models
|
|
|
|
The server should be running at http://localhost:8080
|
|
Start it with: ./run_server.sh
|
|
`);
|
|
}
|
|
|
|
const { values, positionals } = parseArgs({
|
|
args: Bun.argv,
|
|
options: {
|
|
model: {
|
|
type: 'string',
|
|
},
|
|
prompt: {
|
|
type: 'string',
|
|
},
|
|
help: {
|
|
type: 'boolean',
|
|
},
|
|
'list-models': {
|
|
type: 'boolean',
|
|
},
|
|
},
|
|
strict: false,
|
|
allowPositionals: true,
|
|
});
|
|
|
|
async function requestLocalOpenAI(model: string, userPrompt: string) {
|
|
const openai = new OpenAI({
|
|
baseURL: "http://localhost:8080/v1",
|
|
apiKey: "not used",
|
|
});
|
|
try {
|
|
return openai.chat.completions.create({
|
|
model: model,
|
|
max_tokens: DEFAULT_MAX_TOKENS,
|
|
stream: true,
|
|
messages: [
|
|
{name: "assistant_1", role: "system", content: "I am a helpful assistant" },
|
|
{name: "user_1", role: "user", content: userPrompt}
|
|
]
|
|
});
|
|
} catch (e) {
|
|
console.error("[ERROR] Failed to connect to local OpenAI server:", e.message);
|
|
console.error("[HINT] Make sure the server is running at http://localhost:8080");
|
|
console.error("[HINT] Start it with: ./run_server.sh");
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
async function listModels() {
|
|
const openai = new OpenAI({
|
|
baseURL: "http://localhost:8080/v1",
|
|
apiKey: "not used",
|
|
});
|
|
try {
|
|
const models = await openai.models.list();
|
|
console.log(`[INFO] Available models from http://localhost:8080/v1:`);
|
|
console.log("---");
|
|
|
|
if (models.data && models.data.length > 0) {
|
|
models.data.forEach((model, index) => {
|
|
console.log(`${index + 1}. ${model.id}`);
|
|
console.log(` Owner: ${model.owned_by}`);
|
|
console.log(` Created: ${new Date(model.created * 1000).toISOString()}`);
|
|
console.log("");
|
|
});
|
|
console.log(`Total: ${models.data.length} models available`);
|
|
} else {
|
|
console.log("No models found.");
|
|
}
|
|
|
|
} catch (e) {
|
|
console.error("[ERROR] Failed to fetch models from local OpenAI server:", e.message);
|
|
console.error("[HINT] Make sure the server is running at http://localhost:8080");
|
|
console.error("[HINT] Start it with: ./run_server.sh");
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
// Show help if requested
|
|
if (values.help) {
|
|
printHelp();
|
|
process.exit(0);
|
|
}
|
|
|
|
// List models if requested
|
|
if (values['list-models']) {
|
|
try {
|
|
await listModels();
|
|
process.exit(0);
|
|
} catch (error) {
|
|
console.error("\n[ERROR] Failed to list models:", error.message);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Get the prompt from either --prompt flag or positional argument
|
|
const prompt = values.prompt || positionals[2]; // positionals[0] is 'bun', positionals[1] is 'client_cli.ts'
|
|
|
|
if (!prompt) {
|
|
console.error("[ERROR] No prompt provided!");
|
|
printHelp();
|
|
process.exit(1);
|
|
}
|
|
|
|
// Get the model (use default if not provided)
|
|
const model = values.model || DEFAULT_MODEL;
|
|
|
|
console.log(`[INFO] Using model: ${model}`);
|
|
console.log(`[INFO] Prompt: ${prompt}`);
|
|
console.log(`[INFO] Connecting to: http://localhost:8080/v1`);
|
|
console.log("---");
|
|
|
|
try {
|
|
const response = await requestLocalOpenAI(model, prompt);
|
|
|
|
// Handle streaming response
|
|
let fullResponse = "";
|
|
for await (const chunk of response) {
|
|
const content = chunk.choices[0]?.delta?.content;
|
|
if (content) {
|
|
process.stdout.write(content);
|
|
fullResponse += content;
|
|
}
|
|
}
|
|
|
|
console.log("\n---");
|
|
console.log(`[INFO] Response completed. Total length: ${fullResponse.length} characters`);
|
|
|
|
} catch (error) {
|
|
console.error("\n[ERROR] Request failed:", error.message);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Run the main function
|
|
main().catch(error => {
|
|
console.error("[FATAL ERROR]:", error);
|
|
process.exit(1);
|
|
});
|
|
|
|
|