fix mlx omni provider

2025-09-08 22:56:46 +00:00 · 2025-06-18 14:33:07 -04:00
parent f1d7f52dbd
commit b7f02eb4fb
9 changed files with 78 additions and 43 deletions
--- a/packages/server/providers/_ProviderRepository.ts
+++ b/packages/server/providers/_ProviderRepository.ts
@@ -1,7 +1,12 @@
+export type GenericEnv = Record<string, any>;
+

 export class ProviderRepository {
    #providers: {name: string, key: string, endpoint: string}[] = [];
-    constructor(env: Record<string, any>) {
+    #env: Record<string, any>;
+
+    constructor(env: GenericEnv) {
+        this.#env = env
        this.setProviders(env);
    }

@@ -19,7 +24,8 @@ export class ProviderRepository {
        mlx: "http://localhost:10240/v1",
    }

-    static async getModelFamily(model, env: Env) {
+    static async getModelFamily(model: any, env: Env) {
+        console.log(env);
        const allModels = await env.KV_STORAGE.get("supportedModels");
        const models = JSON.parse(allModels);
        const modelData = models.filter(m => m.id === model)
--- a/packages/server/providers/mlx-omni.ts
+++ b/packages/server/providers/mlx-omni.ts
@@ -1,39 +1,71 @@
 import { OpenAI } from "openai";
-import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
+import { Utils } from "../lib/utils";
+import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
+import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";

 export class MlxOmniChatProvider extends BaseChatProvider {
    getOpenAIClient(param: CommonProviderParams): OpenAI {
        return new OpenAI({
-            baseURL: param.env.MLX_API_ENDPOINT ?? "http://localhost:10240",
+            baseURL: "http://localhost:10240",
            apiKey: param.env.MLX_API_KEY,
        });
    }

-    getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
-        const tuningParams = {
-            temperature: 0.75,
+    getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
+        const baseTuningParams = {
+            temperature: 0.86,
+            top_p: 0.98,
+            presence_penalty: 0.1,
+            frequency_penalty: 0.3,
+            max_tokens: param.maxTokens as number,
        };

        const getTuningParams = () => {
-            return tuningParams;
+            return baseTuningParams;
        };

-        return {
+        let completionRequest: ChatCompletionCreateParamsStreaming = {
            model: param.model,
-            messages: safeMessages,
            stream: true,
-            ...getTuningParams(),
+            messages: safeMessages
        };
+
+        const client = this.getOpenAIClient(param);
+        const isLocal = client.baseURL.includes("localhost");
+
+        if(isLocal) {
+            completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
+            completionRequest["stream_options"] = {
+                include_usage: true
+            };
+        } else {
+            completionRequest = {...completionRequest, ...getTuningParams()};
+        }
+
+        return completionRequest;
    }

    async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
-        if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
-            dataCallback({ type: "chat", data: chunk });
-            return true;
+        const isLocal = chunk.usage !== undefined;
+
+        if (isLocal && chunk.usage) {
+            dataCallback({
+                type: "chat",
+                data: {
+                    choices: [
+                        {
+                            delta: { content: "" },
+                            logprobs: null,
+                            finish_reason: "stop",
+                        },
+                    ],
+                },
+            });
+            return true; // Break the stream
        }

        dataCallback({ type: "chat", data: chunk });
-        return false;
+        return false; // Continue the stream
    }
 }

@@ -41,16 +73,7 @@ export class MlxOmniChatSdk {
    private static provider = new MlxOmniChatProvider();

    static async handleMlxOmniStream(
-        ctx: {
-            openai: OpenAI;
-            systemPrompt: any;
-            preprocessedContext: any;
-            maxTokens: unknown | number | undefined;
-            messages: any;
-            disableWebhookGeneration: boolean;
-            model: any;
-            env: Env;
-        },
+        ctx: any,
        dataCallback: (data: any) => any,
    ) {
        if (!ctx.messages?.length) {
@@ -62,10 +85,9 @@ export class MlxOmniChatSdk {
                systemPrompt: ctx.systemPrompt,
                preprocessedContext: ctx.preprocessedContext,
                maxTokens: ctx.maxTokens,
-                messages: ctx.messages,
+                messages: Utils.normalizeWithBlanks(ctx.messages),
                model: ctx.model,
-                env: ctx.env,
-                disableWebhookGeneration: ctx.disableWebhookGeneration,
+                env: ctx.env
            },
            dataCallback,
        );