mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
fmt and clippy
This commit is contained in:
@@ -1,10 +1,5 @@
|
|||||||
use async_openai::types::{CreateEmbeddingRequest, EmbeddingInput};
|
use async_openai::types::{CreateEmbeddingRequest, EmbeddingInput};
|
||||||
use axum::{
|
use axum::{Json, Router, http::StatusCode, response::Json as ResponseJson, routing::post};
|
||||||
Json, Router,
|
|
||||||
http::StatusCode,
|
|
||||||
response::Json as ResponseJson,
|
|
||||||
routing::post,
|
|
||||||
};
|
|
||||||
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
@@ -8,7 +8,7 @@ pub mod server;
|
|||||||
// Re-export key components for easier access
|
// Re-export key components for easier access
|
||||||
pub use inference::ModelInference;
|
pub use inference::ModelInference;
|
||||||
pub use model::{Model, Which};
|
pub use model::{Model, Which};
|
||||||
pub use server::{create_router, AppState};
|
pub use server::{AppState, create_router};
|
||||||
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||||
|
@@ -1,23 +1,26 @@
|
|||||||
use inference_engine::{create_router, init_tracing, get_server_config, AppState};
|
use inference_engine::{AppState, create_router, get_server_config, init_tracing};
|
||||||
use tokio::net::TcpListener;
|
use tokio::net::TcpListener;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
init_tracing();
|
init_tracing();
|
||||||
|
|
||||||
let app_state = AppState::default();
|
let app_state = AppState::default();
|
||||||
let app = create_router(app_state);
|
let app = create_router(app_state);
|
||||||
|
|
||||||
let (server_host, server_port, server_address) = get_server_config();
|
let (server_host, server_port, server_address) = get_server_config();
|
||||||
let listener = TcpListener::bind(&server_address).await?;
|
let listener = TcpListener::bind(&server_address).await?;
|
||||||
|
|
||||||
info!("Inference Engine server starting on http://{}", server_address);
|
info!(
|
||||||
|
"Inference Engine server starting on http://{}",
|
||||||
|
server_address
|
||||||
|
);
|
||||||
info!("Available endpoints:");
|
info!("Available endpoints:");
|
||||||
info!(" POST /v1/chat/completions - OpenAI-compatible chat completions");
|
info!(" POST /v1/chat/completions - OpenAI-compatible chat completions");
|
||||||
info!(" GET /v1/models - List available models");
|
info!(" GET /v1/models - List available models");
|
||||||
|
|
||||||
axum::serve(listener, app).await?;
|
axum::serve(listener, app).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@@ -1,28 +1,28 @@
|
|||||||
use axum::{
|
use axum::{
|
||||||
|
Json, Router,
|
||||||
extract::State,
|
extract::State,
|
||||||
http::StatusCode,
|
http::StatusCode,
|
||||||
response::{sse::Event, sse::Sse, IntoResponse},
|
response::{IntoResponse, sse::Event, sse::Sse},
|
||||||
routing::{get, post},
|
routing::{get, post},
|
||||||
Json, Router,
|
|
||||||
};
|
};
|
||||||
use futures_util::stream::{self, Stream};
|
use futures_util::stream::{self, Stream};
|
||||||
use std::convert::Infallible;
|
use std::convert::Infallible;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::{mpsc, Mutex};
|
use tokio::sync::{Mutex, mpsc};
|
||||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||||
use tower_http::cors::{Any, CorsLayer};
|
use tower_http::cors::{Any, CorsLayer};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::Which;
|
||||||
use crate::openai_types::{
|
use crate::openai_types::{
|
||||||
ChatCompletionChoice, ChatCompletionChunk, ChatCompletionChunkChoice, ChatCompletionRequest,
|
ChatCompletionChoice, ChatCompletionChunk, ChatCompletionChunkChoice, ChatCompletionRequest,
|
||||||
ChatCompletionResponse, Delta, Message, MessageContent, Model, ModelListResponse, Usage,
|
ChatCompletionResponse, Delta, Message, MessageContent, Model, ModelListResponse, Usage,
|
||||||
};
|
};
|
||||||
use crate::Which;
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
use embeddings_engine::models_list;
|
use embeddings_engine::models_list;
|
||||||
use gemma_runner::{run_gemma_api, GemmaInferenceConfig, WhichModel};
|
use gemma_runner::{GemmaInferenceConfig, WhichModel, run_gemma_api};
|
||||||
use llama_runner::{run_llama_inference, LlamaInferenceConfig};
|
use llama_runner::{LlamaInferenceConfig, run_llama_inference};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
// -------------------------
|
// -------------------------
|
||||||
// Shared app state
|
// Shared app state
|
||||||
|
@@ -45,8 +45,6 @@ pub struct Services {
|
|||||||
pub embeddings_url: Option<String>,
|
pub embeddings_url: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
impl Default for ServerConfig {
|
impl Default for ServerConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
@@ -14,12 +14,12 @@ use std::env;
|
|||||||
#[cfg(feature = "ui")]
|
#[cfg(feature = "ui")]
|
||||||
use axum::http::StatusCode as AxumStatusCode;
|
use axum::http::StatusCode as AxumStatusCode;
|
||||||
#[cfg(feature = "ui")]
|
#[cfg(feature = "ui")]
|
||||||
|
use axum::http::Uri;
|
||||||
|
#[cfg(feature = "ui")]
|
||||||
use axum::http::header;
|
use axum::http::header;
|
||||||
#[cfg(feature = "ui")]
|
#[cfg(feature = "ui")]
|
||||||
use axum::response::IntoResponse;
|
use axum::response::IntoResponse;
|
||||||
#[cfg(feature = "ui")]
|
#[cfg(feature = "ui")]
|
||||||
use axum::http::Uri;
|
|
||||||
#[cfg(feature = "ui")]
|
|
||||||
use mime_guess::from_path;
|
use mime_guess::from_path;
|
||||||
#[cfg(feature = "ui")]
|
#[cfg(feature = "ui")]
|
||||||
use rust_embed::Embed;
|
use rust_embed::Embed;
|
||||||
@@ -28,7 +28,6 @@ use tower_http::cors::{Any, CorsLayer};
|
|||||||
use tower_http::trace::TraceLayer;
|
use tower_http::trace::TraceLayer;
|
||||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||||
|
|
||||||
|
|
||||||
#[cfg(feature = "ui")]
|
#[cfg(feature = "ui")]
|
||||||
#[derive(Embed)]
|
#[derive(Embed)]
|
||||||
#[folder = "../../target/site"]
|
#[folder = "../../target/site"]
|
||||||
|
@@ -24,8 +24,7 @@ fn run_build() -> io::Result<()> {
|
|||||||
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo"));
|
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo"));
|
||||||
let output_path = out_dir.join("client-cli");
|
let output_path = out_dir.join("client-cli");
|
||||||
|
|
||||||
let bun_tgt = BunTarget::from_cargo_env()
|
let bun_tgt = BunTarget::from_cargo_env().map_err(|e| io::Error::other(e.to_string()))?;
|
||||||
.map_err(|e| io::Error::other(e.to_string()))?;
|
|
||||||
|
|
||||||
// Optional: warn if using a Bun target that’s marked unsupported in your chart
|
// Optional: warn if using a Bun target that’s marked unsupported in your chart
|
||||||
if matches!(bun_tgt, BunTarget::WindowsArm64) {
|
if matches!(bun_tgt, BunTarget::WindowsArm64) {
|
||||||
@@ -54,9 +53,9 @@ fn run_build() -> io::Result<()> {
|
|||||||
|
|
||||||
if !install_status.success() {
|
if !install_status.success() {
|
||||||
let code = install_status.code().unwrap_or(1);
|
let code = install_status.code().unwrap_or(1);
|
||||||
return Err(io::Error::other(
|
return Err(io::Error::other(format!(
|
||||||
format!("bun install failed with status {code}"),
|
"bun install failed with status {code}"
|
||||||
));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let _target = env::var("TARGET").unwrap();
|
let _target = env::var("TARGET").unwrap();
|
||||||
|
@@ -25,7 +25,7 @@ fn main() -> io::Result<()> {
|
|||||||
// Run it
|
// Run it
|
||||||
let status = Command::new(&tmp).arg("--version").status()?;
|
let status = Command::new(&tmp).arg("--version").status()?;
|
||||||
if !status.success() {
|
if !status.success() {
|
||||||
return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed"));
|
return Err(io::Error::other("client-cli failed"));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@@ -205,7 +205,7 @@ impl TextGeneration {
|
|||||||
|
|
||||||
let start_gen = std::time::Instant::now();
|
let start_gen = std::time::Instant::now();
|
||||||
|
|
||||||
for (_generated_tokens, index) in (0..sample_len).enumerate() {
|
for index in 0..sample_len {
|
||||||
let context_size = if index > 0 { 1 } else { tokens.len() };
|
let context_size = if index > 0 { 1 } else { tokens.len() };
|
||||||
let start_pos = tokens.len().saturating_sub(context_size);
|
let start_pos = tokens.len().saturating_sub(context_size);
|
||||||
let ctxt = &tokens[start_pos..];
|
let ctxt = &tokens[start_pos..];
|
||||||
|
@@ -6,10 +6,8 @@ mod gemma_api;
|
|||||||
mod gemma_cli;
|
mod gemma_cli;
|
||||||
|
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use clap::{Parser, ValueEnum};
|
|
||||||
|
|
||||||
use crate::gemma_cli::run_cli;
|
use crate::gemma_cli::run_cli;
|
||||||
use std::io::Write;
|
|
||||||
|
|
||||||
/// just a placeholder, not used for anything
|
/// just a placeholder, not used for anything
|
||||||
fn main() -> std::result::Result<(), Error> {
|
fn main() -> std::result::Result<(), Error> {
|
||||||
|
@@ -28,7 +28,6 @@ struct KubeMetadata {
|
|||||||
port: u16,
|
port: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct ServiceInfo {
|
struct ServiceInfo {
|
||||||
name: String,
|
name: String,
|
||||||
|
@@ -6,9 +6,6 @@ mod llama_api;
|
|||||||
mod llama_cli;
|
mod llama_cli;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use clap::{Parser, ValueEnum};
|
|
||||||
|
|
||||||
use std::io::Write;
|
|
||||||
|
|
||||||
use crate::llama_cli::run_cli;
|
use crate::llama_cli::run_cli;
|
||||||
|
|
||||||
|
@@ -124,10 +124,7 @@ pub fn hub_load_safetensors(
|
|||||||
}
|
}
|
||||||
let safetensors_files = safetensors_files
|
let safetensors_files = safetensors_files
|
||||||
.iter()
|
.iter()
|
||||||
.map(|v| {
|
.map(|v| repo.get(v).map_err(std::io::Error::other))
|
||||||
repo.get(v)
|
|
||||||
.map_err(|e| std::io::Error::other(e))
|
|
||||||
})
|
|
||||||
.collect::<Result<Vec<_>, std::io::Error>>()?;
|
.collect::<Result<Vec<_>, std::io::Error>>()?;
|
||||||
Ok(safetensors_files)
|
Ok(safetensors_files)
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user