mirror of
https://github.com/geoffsee/open-gsio.git
synced 2025-09-08 22:56:46 +00:00
fix mlx omni provider
This commit is contained in:
1
bun.lock
1
bun.lock
@@ -85,6 +85,7 @@
|
|||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@anthropic-ai/sdk": "^0.32.1",
|
"@anthropic-ai/sdk": "^0.32.1",
|
||||||
"@cloudflare/workers-types": "^4.20241205.0",
|
"@cloudflare/workers-types": "^4.20241205.0",
|
||||||
|
"@open-gsio/client": "workspace:*",
|
||||||
"@open-gsio/env": "workspace:*",
|
"@open-gsio/env": "workspace:*",
|
||||||
"@testing-library/jest-dom": "^6.4.2",
|
"@testing-library/jest-dom": "^6.4.2",
|
||||||
"@testing-library/user-event": "^14.5.2",
|
"@testing-library/user-event": "^14.5.2",
|
||||||
|
3
packages/client/server/server.ts
Normal file
3
packages/client/server/server.ts
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
import { renderPage } from "vike/server";
|
||||||
|
|
||||||
|
export default renderPage;
|
@@ -1,5 +1,5 @@
|
|||||||
import { Router, withParams } from "itty-router";
|
import { Router, withParams } from "itty-router";
|
||||||
import { createRequestContext } from "./RequestContext.ts";
|
import { createRequestContext } from "./RequestContext";
|
||||||
|
|
||||||
export function createRouter() {
|
export function createRouter() {
|
||||||
return (
|
return (
|
||||||
@@ -57,13 +57,18 @@ export function createRouter() {
|
|||||||
// return documentService.handleGetDocument(r)
|
// return documentService.handleGetDocument(r)
|
||||||
// })
|
// })
|
||||||
|
|
||||||
.all("/api/metrics*", async (r, e, c) => {
|
.get("/api/metrics*", async (r, e, c) => {
|
||||||
|
const { metricsService } = createRequestContext(e, c);
|
||||||
|
return metricsService.handleMetricsRequest(r);
|
||||||
|
})
|
||||||
|
|
||||||
|
.post("/api/metrics*", async (r, e, c) => {
|
||||||
const { metricsService } = createRequestContext(e, c);
|
const { metricsService } = createRequestContext(e, c);
|
||||||
return metricsService.handleMetricsRequest(r);
|
return metricsService.handleMetricsRequest(r);
|
||||||
})
|
})
|
||||||
|
|
||||||
// renders the app
|
// renders the app
|
||||||
.get("^(?!/api/).*$", async (r, e, c) => {
|
.all("^(?!/api/)(?!/assets/).*$", async (r, e, c) => {
|
||||||
|
|
||||||
const { assetService } = createRequestContext(e, c);
|
const { assetService } = createRequestContext(e, c);
|
||||||
|
|
||||||
|
@@ -10,6 +10,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@open-gsio/env": "workspace:*",
|
"@open-gsio/env": "workspace:*",
|
||||||
|
"@open-gsio/client": "workspace:*",
|
||||||
"@anthropic-ai/sdk": "^0.32.1",
|
"@anthropic-ai/sdk": "^0.32.1",
|
||||||
"bun-sqlite-key-value": "^1.13.1",
|
"bun-sqlite-key-value": "^1.13.1",
|
||||||
"@cloudflare/workers-types": "^4.20241205.0",
|
"@cloudflare/workers-types": "^4.20241205.0",
|
||||||
|
@@ -1,7 +1,12 @@
|
|||||||
|
export type GenericEnv = Record<string, any>;
|
||||||
|
|
||||||
|
|
||||||
export class ProviderRepository {
|
export class ProviderRepository {
|
||||||
#providers: {name: string, key: string, endpoint: string}[] = [];
|
#providers: {name: string, key: string, endpoint: string}[] = [];
|
||||||
constructor(env: Record<string, any>) {
|
#env: Record<string, any>;
|
||||||
|
|
||||||
|
constructor(env: GenericEnv) {
|
||||||
|
this.#env = env
|
||||||
this.setProviders(env);
|
this.setProviders(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -19,7 +24,8 @@ export class ProviderRepository {
|
|||||||
mlx: "http://localhost:10240/v1",
|
mlx: "http://localhost:10240/v1",
|
||||||
}
|
}
|
||||||
|
|
||||||
static async getModelFamily(model, env: Env) {
|
static async getModelFamily(model: any, env: Env) {
|
||||||
|
console.log(env);
|
||||||
const allModels = await env.KV_STORAGE.get("supportedModels");
|
const allModels = await env.KV_STORAGE.get("supportedModels");
|
||||||
const models = JSON.parse(allModels);
|
const models = JSON.parse(allModels);
|
||||||
const modelData = models.filter(m => m.id === model)
|
const modelData = models.filter(m => m.id === model)
|
||||||
|
@@ -1,39 +1,71 @@
|
|||||||
import { OpenAI } from "openai";
|
import { OpenAI } from "openai";
|
||||||
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
|
import { Utils } from "../lib/utils";
|
||||||
|
import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
|
||||||
|
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
|
||||||
|
|
||||||
export class MlxOmniChatProvider extends BaseChatProvider {
|
export class MlxOmniChatProvider extends BaseChatProvider {
|
||||||
getOpenAIClient(param: CommonProviderParams): OpenAI {
|
getOpenAIClient(param: CommonProviderParams): OpenAI {
|
||||||
return new OpenAI({
|
return new OpenAI({
|
||||||
baseURL: param.env.MLX_API_ENDPOINT ?? "http://localhost:10240",
|
baseURL: "http://localhost:10240",
|
||||||
apiKey: param.env.MLX_API_KEY,
|
apiKey: param.env.MLX_API_KEY,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
|
getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
|
||||||
const tuningParams = {
|
const baseTuningParams = {
|
||||||
temperature: 0.75,
|
temperature: 0.86,
|
||||||
|
top_p: 0.98,
|
||||||
|
presence_penalty: 0.1,
|
||||||
|
frequency_penalty: 0.3,
|
||||||
|
max_tokens: param.maxTokens as number,
|
||||||
};
|
};
|
||||||
|
|
||||||
const getTuningParams = () => {
|
const getTuningParams = () => {
|
||||||
return tuningParams;
|
return baseTuningParams;
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
let completionRequest: ChatCompletionCreateParamsStreaming = {
|
||||||
model: param.model,
|
model: param.model,
|
||||||
messages: safeMessages,
|
|
||||||
stream: true,
|
stream: true,
|
||||||
...getTuningParams(),
|
messages: safeMessages
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const client = this.getOpenAIClient(param);
|
||||||
|
const isLocal = client.baseURL.includes("localhost");
|
||||||
|
|
||||||
|
if(isLocal) {
|
||||||
|
completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
|
||||||
|
completionRequest["stream_options"] = {
|
||||||
|
include_usage: true
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
completionRequest = {...completionRequest, ...getTuningParams()};
|
||||||
|
}
|
||||||
|
|
||||||
|
return completionRequest;
|
||||||
}
|
}
|
||||||
|
|
||||||
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
|
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
|
||||||
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
|
const isLocal = chunk.usage !== undefined;
|
||||||
dataCallback({ type: "chat", data: chunk });
|
|
||||||
return true;
|
if (isLocal && chunk.usage) {
|
||||||
|
dataCallback({
|
||||||
|
type: "chat",
|
||||||
|
data: {
|
||||||
|
choices: [
|
||||||
|
{
|
||||||
|
delta: { content: "" },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: "stop",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return true; // Break the stream
|
||||||
}
|
}
|
||||||
|
|
||||||
dataCallback({ type: "chat", data: chunk });
|
dataCallback({ type: "chat", data: chunk });
|
||||||
return false;
|
return false; // Continue the stream
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,16 +73,7 @@ export class MlxOmniChatSdk {
|
|||||||
private static provider = new MlxOmniChatProvider();
|
private static provider = new MlxOmniChatProvider();
|
||||||
|
|
||||||
static async handleMlxOmniStream(
|
static async handleMlxOmniStream(
|
||||||
ctx: {
|
ctx: any,
|
||||||
openai: OpenAI;
|
|
||||||
systemPrompt: any;
|
|
||||||
preprocessedContext: any;
|
|
||||||
maxTokens: unknown | number | undefined;
|
|
||||||
messages: any;
|
|
||||||
disableWebhookGeneration: boolean;
|
|
||||||
model: any;
|
|
||||||
env: Env;
|
|
||||||
},
|
|
||||||
dataCallback: (data: any) => any,
|
dataCallback: (data: any) => any,
|
||||||
) {
|
) {
|
||||||
if (!ctx.messages?.length) {
|
if (!ctx.messages?.length) {
|
||||||
@@ -62,10 +85,9 @@ export class MlxOmniChatSdk {
|
|||||||
systemPrompt: ctx.systemPrompt,
|
systemPrompt: ctx.systemPrompt,
|
||||||
preprocessedContext: ctx.preprocessedContext,
|
preprocessedContext: ctx.preprocessedContext,
|
||||||
maxTokens: ctx.maxTokens,
|
maxTokens: ctx.maxTokens,
|
||||||
messages: ctx.messages,
|
messages: Utils.normalizeWithBlanks(ctx.messages),
|
||||||
model: ctx.model,
|
model: ctx.model,
|
||||||
env: ctx.env,
|
env: ctx.env
|
||||||
disableWebhookGeneration: ctx.disableWebhookGeneration,
|
|
||||||
},
|
},
|
||||||
dataCallback,
|
dataCallback,
|
||||||
);
|
);
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
import { types } from "mobx-state-tree";
|
import { types } from "mobx-state-tree";
|
||||||
import { renderPage } from "vike/server";
|
|
||||||
|
|
||||||
export default types
|
export default types
|
||||||
.model("StaticAssetStore", {})
|
.model("StaticAssetStore", {})
|
||||||
@@ -17,7 +17,7 @@ export default types
|
|||||||
async handleSsr(
|
async handleSsr(
|
||||||
url: string,
|
url: string,
|
||||||
headers: Headers,
|
headers: Headers,
|
||||||
env: Vike.PageContext["env"],
|
env: Vike.PageContext.env,
|
||||||
) {
|
) {
|
||||||
console.log("handleSsr");
|
console.log("handleSsr");
|
||||||
const pageContextInit = {
|
const pageContextInit = {
|
||||||
|
@@ -13,7 +13,7 @@ import {XaiChatSdk} from "../providers/xai";
|
|||||||
import {CerebrasSdk} from "../providers/cerebras";
|
import {CerebrasSdk} from "../providers/cerebras";
|
||||||
import {CloudflareAISdk} from "../providers/cloudflareAi";
|
import {CloudflareAISdk} from "../providers/cloudflareAi";
|
||||||
import {OllamaChatSdk} from "../providers/ollama";
|
import {OllamaChatSdk} from "../providers/ollama";
|
||||||
import {MlxOmniChatSdk} from "../providers/mlx-omni";
|
import {MlxOmniChatProvider, MlxOmniChatSdk} from "../providers/mlx-omni";
|
||||||
import {ProviderRepository} from "../providers/_ProviderRepository";
|
import {ProviderRepository} from "../providers/_ProviderRepository";
|
||||||
|
|
||||||
export interface StreamParams {
|
export interface StreamParams {
|
||||||
@@ -126,7 +126,7 @@ const ChatService = types
|
|||||||
// ----- Helpers ----------------------------------------------------------
|
// ----- Helpers ----------------------------------------------------------
|
||||||
const logger = console;
|
const logger = console;
|
||||||
|
|
||||||
const useCache = false;
|
const useCache = true;
|
||||||
|
|
||||||
if(useCache) {
|
if(useCache) {
|
||||||
// ----- 1. Try cached value ---------------------------------------------
|
// ----- 1. Try cached value ---------------------------------------------
|
||||||
@@ -139,9 +139,10 @@ const ChatService = types
|
|||||||
return new Response(JSON.stringify(parsed), { status: 200 });
|
return new Response(JSON.stringify(parsed), { status: 200 });
|
||||||
}
|
}
|
||||||
logger.warn('Cache entry malformed – refreshing');
|
logger.warn('Cache entry malformed – refreshing');
|
||||||
|
throw new Error('Malformed cache entry');
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error('Error reading/parsing supportedModels cache', err);
|
logger.warn('Error reading/parsing supportedModels cache', err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -260,11 +261,8 @@ const ChatService = types
|
|||||||
}) {
|
}) {
|
||||||
const {streamConfig, streamParams, controller, encoder, streamId} = params;
|
const {streamConfig, streamParams, controller, encoder, streamId} = params;
|
||||||
|
|
||||||
const useModelFamily = () => {
|
|
||||||
return ProviderRepository.getModelFamily(streamConfig.model, self.env)
|
|
||||||
}
|
|
||||||
|
|
||||||
const modelFamily = await useModelFamily();
|
const modelFamily = await ProviderRepository.getModelFamily(streamConfig.model, self.env);
|
||||||
|
|
||||||
const useModelHandler = () => {
|
const useModelHandler = () => {
|
||||||
return modelHandlers[modelFamily]
|
return modelHandlers[modelFamily]
|
||||||
|
@@ -11,6 +11,5 @@
|
|||||||
"moduleResolution": "bundler",
|
"moduleResolution": "bundler",
|
||||||
"skipLibCheck": true,
|
"skipLibCheck": true,
|
||||||
"jsx": "react-jsx"
|
"jsx": "react-jsx"
|
||||||
},
|
}
|
||||||
"exclude": ["*.test.ts"]
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user