mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
Refactor apply_cached_repeat_penalty
for optimized caching and reuse, add extensive unit tests, and integrate special handling for gemma-specific models.
Removed `test_request.sh`, deprecated functionality, and unused imports; introduced a new CLI tool (`cli.ts`) for testing inference engine and adjusted handling of non-streaming/streaming chat completions. - Add CPU fallback support for text generation when primary device is unsupported - Introduce `execute_with_fallback` method to handle device compatibility and shape mismatch errors - Extend unit tests to reproduce tensor shape mismatch errors specific to model configurations - Increase HTTP timeout limits in `curl_chat_stream.sh` script for reliable API testing chat completion endpoint functions with gemma3 (no streaming) Add benchmarking guide with HTML reporting, Leptos chat crate, and middleware for metrics tracking
This commit is contained in:
14
integration/bun.lock
Normal file
14
integration/bun.lock
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "@predict-otron-9000/ingeration",
|
||||
"dependencies": {
|
||||
"openai": "^5.16.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"openai": ["openai@5.16.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-hoEH8ZNvg1HXjU9mp88L/ZH8O082Z8r6FHCXGiWAzVRrEv443aI57qhch4snu07yQydj+AUAWLenAiBXhu89Tw=="],
|
||||
}
|
||||
}
|
32
integration/openai-client-tests/actual_openai.test.ts
Executable file
32
integration/openai-client-tests/actual_openai.test.ts
Executable file
@@ -0,0 +1,32 @@
|
||||
// #!/usr/bin/env bun
|
||||
//
|
||||
// import OpenAI from "openai";
|
||||
// import {describe, test, expect} from "bun:test";
|
||||
//
|
||||
// async function requestActualOpenAI(userPrompt: string) {
|
||||
// const openai = new OpenAI();
|
||||
// return await openai.chat.completions.create({
|
||||
// model: "gpt-4o",
|
||||
// max_tokens: 100,
|
||||
// messages: [{name: "user_1", role: "user", content: userPrompt}]
|
||||
// }).then(result => result.choices[0].message);
|
||||
// }
|
||||
//
|
||||
// // Exists as a smoke test.
|
||||
// describe("Actual OpenAI Completions", () => {
|
||||
// test("Should return a valid message", async () => {
|
||||
// const userPrompt = "Who was the 16th president of the United States?";
|
||||
// const result = await requestActualOpenAI(userPrompt);
|
||||
//
|
||||
// console.log({
|
||||
// test: "hitting actual openai to ensure basic functionality",
|
||||
// modelResponse: result.content,
|
||||
// userPrompt
|
||||
// });
|
||||
//
|
||||
// expect(result.annotations).toEqual([])
|
||||
// expect(result.content).toBeDefined();
|
||||
// expect(result.refusal).toEqual(null);
|
||||
// expect(result.role).toEqual("assistant");
|
||||
// })
|
||||
// })
|
43
integration/openai-client-tests/local_openai.test.ts
Executable file
43
integration/openai-client-tests/local_openai.test.ts
Executable file
@@ -0,0 +1,43 @@
|
||||
import OpenAI from "openai";
|
||||
import {describe, test, expect} from "bun:test";
|
||||
|
||||
const supportedModels = ["gemma-3-1b-it"];
|
||||
|
||||
|
||||
async function requestLocalOpenAI(model: string, userPrompt: string) {
|
||||
const openai = new OpenAI({
|
||||
baseURL: "http://localhost:8080/v1",
|
||||
apiKey: "not used",
|
||||
});
|
||||
try {
|
||||
return openai.chat.completions.create({
|
||||
model: model,
|
||||
max_tokens: 100,
|
||||
stream: true,
|
||||
messages: [
|
||||
{name: "assistant_1", role: "system", content: "I am a helpful assistant" },
|
||||
{name: "user_1", role: "user", content: userPrompt}
|
||||
]
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
describe("Local OpenAI Completions", () => {
|
||||
test("Should return a valid message", async () => {
|
||||
const model = supportedModels.pop();
|
||||
const userPrompt = "Who was the 16th president of the United States?";
|
||||
const response = await requestLocalOpenAI(model, userPrompt);
|
||||
|
||||
const chunks = [];
|
||||
for await (const chunk of response) {
|
||||
console.log('Received chunk:', chunk);
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(0);
|
||||
})
|
||||
})
|
||||
|
6
integration/package.json
Normal file
6
integration/package.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "@predict-otron-9000/ingeration",
|
||||
"dependencies": {
|
||||
"openai": "^5.16.0"
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user