fix mlx omni provider

This commit is contained in:
geoffsee
2025-06-18 14:33:07 -04:00
parent f1d7f52dbd
commit b7f02eb4fb
9 changed files with 78 additions and 43 deletions

View File

@@ -1,7 +1,12 @@
export type GenericEnv = Record<string, any>;
export class ProviderRepository {
#providers: {name: string, key: string, endpoint: string}[] = [];
constructor(env: Record<string, any>) {
#env: Record<string, any>;
constructor(env: GenericEnv) {
this.#env = env
this.setProviders(env);
}
@@ -19,7 +24,8 @@ export class ProviderRepository {
mlx: "http://localhost:10240/v1",
}
static async getModelFamily(model, env: Env) {
static async getModelFamily(model: any, env: Env) {
console.log(env);
const allModels = await env.KV_STORAGE.get("supportedModels");
const models = JSON.parse(allModels);
const modelData = models.filter(m => m.id === model)

View File

@@ -1,39 +1,71 @@
import { OpenAI } from "openai";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
import { Utils } from "../lib/utils";
import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class MlxOmniChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: param.env.MLX_API_ENDPOINT ?? "http://localhost:10240",
baseURL: "http://localhost:10240",
apiKey: param.env.MLX_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const tuningParams = {
temperature: 0.75,
getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
const baseTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: param.maxTokens as number,
};
const getTuningParams = () => {
return tuningParams;
return baseTuningParams;
};
return {
let completionRequest: ChatCompletionCreateParamsStreaming = {
model: param.model,
messages: safeMessages,
stream: true,
...getTuningParams(),
messages: safeMessages
};
const client = this.getOpenAIClient(param);
const isLocal = client.baseURL.includes("localhost");
if(isLocal) {
completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
completionRequest["stream_options"] = {
include_usage: true
};
} else {
completionRequest = {...completionRequest, ...getTuningParams()};
}
return completionRequest;
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true;
const isLocal = chunk.usage !== undefined;
if (isLocal && chunk.usage) {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: "" },
logprobs: null,
finish_reason: "stop",
},
],
},
});
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false;
return false; // Continue the stream
}
}
@@ -41,16 +73,7 @@ export class MlxOmniChatSdk {
private static provider = new MlxOmniChatProvider();
static async handleMlxOmniStream(
ctx: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
disableWebhookGeneration: boolean;
model: any;
env: Env;
},
ctx: any,
dataCallback: (data: any) => any,
) {
if (!ctx.messages?.length) {
@@ -62,10 +85,9 @@ export class MlxOmniChatSdk {
systemPrompt: ctx.systemPrompt,
preprocessedContext: ctx.preprocessedContext,
maxTokens: ctx.maxTokens,
messages: ctx.messages,
messages: Utils.normalizeWithBlanks(ctx.messages),
model: ctx.model,
env: ctx.env,
disableWebhookGeneration: ctx.disableWebhookGeneration,
env: ctx.env
},
dataCallback,
);