mirror of
https://github.com/geoffsee/open-gsio.git
synced 2025-09-08 22:56:46 +00:00
fix mlx omni provider
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
export type GenericEnv = Record<string, any>;
|
||||
|
||||
|
||||
export class ProviderRepository {
|
||||
#providers: {name: string, key: string, endpoint: string}[] = [];
|
||||
constructor(env: Record<string, any>) {
|
||||
#env: Record<string, any>;
|
||||
|
||||
constructor(env: GenericEnv) {
|
||||
this.#env = env
|
||||
this.setProviders(env);
|
||||
}
|
||||
|
||||
@@ -19,7 +24,8 @@ export class ProviderRepository {
|
||||
mlx: "http://localhost:10240/v1",
|
||||
}
|
||||
|
||||
static async getModelFamily(model, env: Env) {
|
||||
static async getModelFamily(model: any, env: Env) {
|
||||
console.log(env);
|
||||
const allModels = await env.KV_STORAGE.get("supportedModels");
|
||||
const models = JSON.parse(allModels);
|
||||
const modelData = models.filter(m => m.id === model)
|
||||
|
@@ -1,39 +1,71 @@
|
||||
import { OpenAI } from "openai";
|
||||
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
|
||||
import { Utils } from "../lib/utils";
|
||||
import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
|
||||
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
|
||||
|
||||
export class MlxOmniChatProvider extends BaseChatProvider {
|
||||
getOpenAIClient(param: CommonProviderParams): OpenAI {
|
||||
return new OpenAI({
|
||||
baseURL: param.env.MLX_API_ENDPOINT ?? "http://localhost:10240",
|
||||
baseURL: "http://localhost:10240",
|
||||
apiKey: param.env.MLX_API_KEY,
|
||||
});
|
||||
}
|
||||
|
||||
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
|
||||
const tuningParams = {
|
||||
temperature: 0.75,
|
||||
getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
|
||||
const baseTuningParams = {
|
||||
temperature: 0.86,
|
||||
top_p: 0.98,
|
||||
presence_penalty: 0.1,
|
||||
frequency_penalty: 0.3,
|
||||
max_tokens: param.maxTokens as number,
|
||||
};
|
||||
|
||||
const getTuningParams = () => {
|
||||
return tuningParams;
|
||||
return baseTuningParams;
|
||||
};
|
||||
|
||||
return {
|
||||
let completionRequest: ChatCompletionCreateParamsStreaming = {
|
||||
model: param.model,
|
||||
messages: safeMessages,
|
||||
stream: true,
|
||||
...getTuningParams(),
|
||||
messages: safeMessages
|
||||
};
|
||||
|
||||
const client = this.getOpenAIClient(param);
|
||||
const isLocal = client.baseURL.includes("localhost");
|
||||
|
||||
if(isLocal) {
|
||||
completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
|
||||
completionRequest["stream_options"] = {
|
||||
include_usage: true
|
||||
};
|
||||
} else {
|
||||
completionRequest = {...completionRequest, ...getTuningParams()};
|
||||
}
|
||||
|
||||
return completionRequest;
|
||||
}
|
||||
|
||||
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
|
||||
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
|
||||
dataCallback({ type: "chat", data: chunk });
|
||||
return true;
|
||||
const isLocal = chunk.usage !== undefined;
|
||||
|
||||
if (isLocal && chunk.usage) {
|
||||
dataCallback({
|
||||
type: "chat",
|
||||
data: {
|
||||
choices: [
|
||||
{
|
||||
delta: { content: "" },
|
||||
logprobs: null,
|
||||
finish_reason: "stop",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
return true; // Break the stream
|
||||
}
|
||||
|
||||
dataCallback({ type: "chat", data: chunk });
|
||||
return false;
|
||||
return false; // Continue the stream
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,16 +73,7 @@ export class MlxOmniChatSdk {
|
||||
private static provider = new MlxOmniChatProvider();
|
||||
|
||||
static async handleMlxOmniStream(
|
||||
ctx: {
|
||||
openai: OpenAI;
|
||||
systemPrompt: any;
|
||||
preprocessedContext: any;
|
||||
maxTokens: unknown | number | undefined;
|
||||
messages: any;
|
||||
disableWebhookGeneration: boolean;
|
||||
model: any;
|
||||
env: Env;
|
||||
},
|
||||
ctx: any,
|
||||
dataCallback: (data: any) => any,
|
||||
) {
|
||||
if (!ctx.messages?.length) {
|
||||
@@ -62,10 +85,9 @@ export class MlxOmniChatSdk {
|
||||
systemPrompt: ctx.systemPrompt,
|
||||
preprocessedContext: ctx.preprocessedContext,
|
||||
maxTokens: ctx.maxTokens,
|
||||
messages: ctx.messages,
|
||||
messages: Utils.normalizeWithBlanks(ctx.messages),
|
||||
model: ctx.model,
|
||||
env: ctx.env,
|
||||
disableWebhookGeneration: ctx.disableWebhookGeneration,
|
||||
env: ctx.env
|
||||
},
|
||||
dataCallback,
|
||||
);
|
||||
|
Reference in New Issue
Block a user