fix mlx omni provider

This commit is contained in:
geoffsee
2025-06-18 14:33:07 -04:00
parent f1d7f52dbd
commit b7f02eb4fb
9 changed files with 78 additions and 43 deletions

View File

@@ -85,6 +85,7 @@
"devDependencies": { "devDependencies": {
"@anthropic-ai/sdk": "^0.32.1", "@anthropic-ai/sdk": "^0.32.1",
"@cloudflare/workers-types": "^4.20241205.0", "@cloudflare/workers-types": "^4.20241205.0",
"@open-gsio/client": "workspace:*",
"@open-gsio/env": "workspace:*", "@open-gsio/env": "workspace:*",
"@testing-library/jest-dom": "^6.4.2", "@testing-library/jest-dom": "^6.4.2",
"@testing-library/user-event": "^14.5.2", "@testing-library/user-event": "^14.5.2",

View File

@@ -0,0 +1,3 @@
import { renderPage } from "vike/server";
export default renderPage;

View File

@@ -1,5 +1,5 @@
import { Router, withParams } from "itty-router"; import { Router, withParams } from "itty-router";
import { createRequestContext } from "./RequestContext.ts"; import { createRequestContext } from "./RequestContext";
export function createRouter() { export function createRouter() {
return ( return (
@@ -57,13 +57,18 @@ export function createRouter() {
// return documentService.handleGetDocument(r) // return documentService.handleGetDocument(r)
// }) // })
.all("/api/metrics*", async (r, e, c) => { .get("/api/metrics*", async (r, e, c) => {
const { metricsService } = createRequestContext(e, c);
return metricsService.handleMetricsRequest(r);
})
.post("/api/metrics*", async (r, e, c) => {
const { metricsService } = createRequestContext(e, c); const { metricsService } = createRequestContext(e, c);
return metricsService.handleMetricsRequest(r); return metricsService.handleMetricsRequest(r);
}) })
// renders the app // renders the app
.get("^(?!/api/).*$", async (r, e, c) => { .all("^(?!/api/)(?!/assets/).*$", async (r, e, c) => {
const { assetService } = createRequestContext(e, c); const { assetService } = createRequestContext(e, c);

View File

@@ -10,6 +10,7 @@
}, },
"devDependencies": { "devDependencies": {
"@open-gsio/env": "workspace:*", "@open-gsio/env": "workspace:*",
"@open-gsio/client": "workspace:*",
"@anthropic-ai/sdk": "^0.32.1", "@anthropic-ai/sdk": "^0.32.1",
"bun-sqlite-key-value": "^1.13.1", "bun-sqlite-key-value": "^1.13.1",
"@cloudflare/workers-types": "^4.20241205.0", "@cloudflare/workers-types": "^4.20241205.0",

View File

@@ -1,7 +1,12 @@
export type GenericEnv = Record<string, any>;
export class ProviderRepository { export class ProviderRepository {
#providers: {name: string, key: string, endpoint: string}[] = []; #providers: {name: string, key: string, endpoint: string}[] = [];
constructor(env: Record<string, any>) { #env: Record<string, any>;
constructor(env: GenericEnv) {
this.#env = env
this.setProviders(env); this.setProviders(env);
} }
@@ -19,7 +24,8 @@ export class ProviderRepository {
mlx: "http://localhost:10240/v1", mlx: "http://localhost:10240/v1",
} }
static async getModelFamily(model, env: Env) { static async getModelFamily(model: any, env: Env) {
console.log(env);
const allModels = await env.KV_STORAGE.get("supportedModels"); const allModels = await env.KV_STORAGE.get("supportedModels");
const models = JSON.parse(allModels); const models = JSON.parse(allModels);
const modelData = models.filter(m => m.id === model) const modelData = models.filter(m => m.id === model)

View File

@@ -1,39 +1,71 @@
import { OpenAI } from "openai"; import { OpenAI } from "openai";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts"; import { Utils } from "../lib/utils";
import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class MlxOmniChatProvider extends BaseChatProvider { export class MlxOmniChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI { getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({ return new OpenAI({
baseURL: param.env.MLX_API_ENDPOINT ?? "http://localhost:10240", baseURL: "http://localhost:10240",
apiKey: param.env.MLX_API_KEY, apiKey: param.env.MLX_API_KEY,
}); });
} }
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any { getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
const tuningParams = { const baseTuningParams = {
temperature: 0.75, temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: param.maxTokens as number,
}; };
const getTuningParams = () => { const getTuningParams = () => {
return tuningParams; return baseTuningParams;
}; };
return { let completionRequest: ChatCompletionCreateParamsStreaming = {
model: param.model, model: param.model,
messages: safeMessages,
stream: true, stream: true,
...getTuningParams(), messages: safeMessages
}; };
const client = this.getOpenAIClient(param);
const isLocal = client.baseURL.includes("localhost");
if(isLocal) {
completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
completionRequest["stream_options"] = {
include_usage: true
};
} else {
completionRequest = {...completionRequest, ...getTuningParams()};
}
return completionRequest;
} }
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> { async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") { const isLocal = chunk.usage !== undefined;
dataCallback({ type: "chat", data: chunk });
return true; if (isLocal && chunk.usage) {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: "" },
logprobs: null,
finish_reason: "stop",
},
],
},
});
return true; // Break the stream
} }
dataCallback({ type: "chat", data: chunk }); dataCallback({ type: "chat", data: chunk });
return false; return false; // Continue the stream
} }
} }
@@ -41,16 +73,7 @@ export class MlxOmniChatSdk {
private static provider = new MlxOmniChatProvider(); private static provider = new MlxOmniChatProvider();
static async handleMlxOmniStream( static async handleMlxOmniStream(
ctx: { ctx: any,
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
disableWebhookGeneration: boolean;
model: any;
env: Env;
},
dataCallback: (data: any) => any, dataCallback: (data: any) => any,
) { ) {
if (!ctx.messages?.length) { if (!ctx.messages?.length) {
@@ -62,10 +85,9 @@ export class MlxOmniChatSdk {
systemPrompt: ctx.systemPrompt, systemPrompt: ctx.systemPrompt,
preprocessedContext: ctx.preprocessedContext, preprocessedContext: ctx.preprocessedContext,
maxTokens: ctx.maxTokens, maxTokens: ctx.maxTokens,
messages: ctx.messages, messages: Utils.normalizeWithBlanks(ctx.messages),
model: ctx.model, model: ctx.model,
env: ctx.env, env: ctx.env
disableWebhookGeneration: ctx.disableWebhookGeneration,
}, },
dataCallback, dataCallback,
); );

View File

@@ -1,5 +1,5 @@
import { types } from "mobx-state-tree"; import { types } from "mobx-state-tree";
import { renderPage } from "vike/server";
export default types export default types
.model("StaticAssetStore", {}) .model("StaticAssetStore", {})
@@ -17,7 +17,7 @@ export default types
async handleSsr( async handleSsr(
url: string, url: string,
headers: Headers, headers: Headers,
env: Vike.PageContext["env"], env: Vike.PageContext.env,
) { ) {
console.log("handleSsr"); console.log("handleSsr");
const pageContextInit = { const pageContextInit = {

View File

@@ -13,7 +13,7 @@ import {XaiChatSdk} from "../providers/xai";
import {CerebrasSdk} from "../providers/cerebras"; import {CerebrasSdk} from "../providers/cerebras";
import {CloudflareAISdk} from "../providers/cloudflareAi"; import {CloudflareAISdk} from "../providers/cloudflareAi";
import {OllamaChatSdk} from "../providers/ollama"; import {OllamaChatSdk} from "../providers/ollama";
import {MlxOmniChatSdk} from "../providers/mlx-omni"; import {MlxOmniChatProvider, MlxOmniChatSdk} from "../providers/mlx-omni";
import {ProviderRepository} from "../providers/_ProviderRepository"; import {ProviderRepository} from "../providers/_ProviderRepository";
export interface StreamParams { export interface StreamParams {
@@ -126,7 +126,7 @@ const ChatService = types
// ----- Helpers ---------------------------------------------------------- // ----- Helpers ----------------------------------------------------------
const logger = console; const logger = console;
const useCache = false; const useCache = true;
if(useCache) { if(useCache) {
// ----- 1. Try cached value --------------------------------------------- // ----- 1. Try cached value ---------------------------------------------
@@ -139,9 +139,10 @@ const ChatService = types
return new Response(JSON.stringify(parsed), { status: 200 }); return new Response(JSON.stringify(parsed), { status: 200 });
} }
logger.warn('Cache entry malformed refreshing'); logger.warn('Cache entry malformed refreshing');
throw new Error('Malformed cache entry');
} }
} catch (err) { } catch (err) {
logger.error('Error reading/parsing supportedModels cache', err); logger.warn('Error reading/parsing supportedModels cache', err);
} }
} }
@@ -260,11 +261,8 @@ const ChatService = types
}) { }) {
const {streamConfig, streamParams, controller, encoder, streamId} = params; const {streamConfig, streamParams, controller, encoder, streamId} = params;
const useModelFamily = () => {
return ProviderRepository.getModelFamily(streamConfig.model, self.env)
}
const modelFamily = await useModelFamily(); const modelFamily = await ProviderRepository.getModelFamily(streamConfig.model, self.env);
const useModelHandler = () => { const useModelHandler = () => {
return modelHandlers[modelFamily] return modelHandlers[modelFamily]

View File

@@ -11,6 +11,5 @@
"moduleResolution": "bundler", "moduleResolution": "bundler",
"skipLibCheck": true, "skipLibCheck": true,
"jsx": "react-jsx" "jsx": "react-jsx"
}, }
"exclude": ["*.test.ts"]
} }