mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
fix clippy errors
This commit is contained in:
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Clippy
|
||||
shell: bash
|
||||
run: cargo clippy --all-targets
|
||||
run: cargo clippy --all
|
||||
|
||||
- name: Tests
|
||||
shell: bash
|
||||
|
@@ -3,7 +3,7 @@ use axum::{
|
||||
Json, Router,
|
||||
http::StatusCode,
|
||||
response::Json as ResponseJson,
|
||||
routing::{get, post},
|
||||
routing::post,
|
||||
};
|
||||
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -11,7 +11,6 @@ use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing;
|
||||
|
||||
// Cache for multiple embedding models
|
||||
static MODEL_CACHE: Lazy<RwLock<HashMap<EmbeddingModel, Arc<TextEmbedding>>>> =
|
||||
|
@@ -39,28 +39,13 @@ impl Default for ServerMode {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
|
||||
pub struct Services {
|
||||
pub inference_url: Option<String>,
|
||||
pub embeddings_url: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for Services {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inference_url: None,
|
||||
embeddings_url: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inference_service_url() -> String {
|
||||
"http://inference-service:8080".to_string()
|
||||
}
|
||||
|
||||
fn embeddings_service_url() -> String {
|
||||
"http://embeddings-service:8080".to_string()
|
||||
}
|
||||
|
||||
impl Default for ServerConfig {
|
||||
fn default() -> Self {
|
||||
@@ -118,8 +103,7 @@ impl ServerConfig {
|
||||
"HighAvailability mode configured but services not well defined! \n## Config Used:\n {}",
|
||||
config_string
|
||||
);
|
||||
let err = std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
let err = std::io::Error::other(
|
||||
"HighAvailability mode configured but services not well defined!",
|
||||
);
|
||||
return Err(err);
|
||||
|
@@ -126,7 +126,7 @@ use crate::config::ServerConfig;
|
||||
/// - Pretty JSON is fine in TOML using `''' ... '''`, but remember the newlines are part of the string.
|
||||
/// - If you control the consumer, TOML tables (the alternative above) are more ergonomic than embedding JSON.
|
||||
|
||||
/// HTTP client configured for proxying requests
|
||||
/// HTTP client configured for proxying requests
|
||||
#[derive(Clone)]
|
||||
pub struct ProxyClient {
|
||||
client: Client,
|
||||
|
@@ -4,12 +4,11 @@ mod middleware;
|
||||
mod standalone_mode;
|
||||
|
||||
use crate::standalone_mode::create_standalone_router;
|
||||
use axum::handler::Handler;
|
||||
use axum::http::StatusCode as AxumStatusCode;
|
||||
use axum::http::header;
|
||||
use axum::response::IntoResponse;
|
||||
use axum::routing::get;
|
||||
use axum::{Router, ServiceExt, http::Uri, response::Html, serve};
|
||||
use axum::{Router, http::Uri, response::Html, serve};
|
||||
use config::ServerConfig;
|
||||
use ha_mode::create_ha_router;
|
||||
use inference_engine::AppState;
|
||||
@@ -127,7 +126,7 @@ async fn main() {
|
||||
.layer(TraceLayer::new_for_http());
|
||||
|
||||
// Server configuration
|
||||
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| String::from(default_host));
|
||||
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| default_host.to_string());
|
||||
|
||||
let server_port = env::var("SERVER_PORT")
|
||||
.map(|v| v.parse::<u16>().unwrap_or(default_port))
|
||||
|
@@ -2,7 +2,7 @@ use crate::config::ServerConfig;
|
||||
use axum::Router;
|
||||
use inference_engine::AppState;
|
||||
|
||||
pub fn create_standalone_router(server_config: ServerConfig) -> Router {
|
||||
pub fn create_standalone_router(_server_config: ServerConfig) -> Router {
|
||||
// Create unified router by merging embeddings and inference routers (existing behavior)
|
||||
let embeddings_router = embeddings_engine::create_embeddings_router();
|
||||
|
||||
|
@@ -25,7 +25,7 @@ fn run_build() -> io::Result<()> {
|
||||
let output_path = out_dir.join("client-cli");
|
||||
|
||||
let bun_tgt = BunTarget::from_cargo_env()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
||||
.map_err(|e| io::Error::other(e.to_string()))?;
|
||||
|
||||
// Optional: warn if using a Bun target that’s marked unsupported in your chart
|
||||
if matches!(bun_tgt, BunTarget::WindowsArm64) {
|
||||
@@ -54,13 +54,12 @@ fn run_build() -> io::Result<()> {
|
||||
|
||||
if !install_status.success() {
|
||||
let code = install_status.code().unwrap_or(1);
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
return Err(io::Error::other(
|
||||
format!("bun install failed with status {code}"),
|
||||
));
|
||||
}
|
||||
|
||||
let target = env::var("TARGET").unwrap();
|
||||
let _target = env::var("TARGET").unwrap();
|
||||
|
||||
// --- bun build (in ./package), emit to OUT_DIR, keep temps inside OUT_DIR ---
|
||||
let mut build = Command::new("bun")
|
||||
@@ -87,7 +86,7 @@ fn run_build() -> io::Result<()> {
|
||||
} else {
|
||||
let code = status.code().unwrap_or(1);
|
||||
warn(&format!("bun build failed with status: {code}"));
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "bun build failed"));
|
||||
return Err(io::Error::other("bun build failed"));
|
||||
}
|
||||
|
||||
// Ensure the output is executable (after it exists)
|
||||
|
@@ -191,8 +191,6 @@ impl TextGeneration {
|
||||
// Make sure stdout isn't holding anything (if caller also prints).
|
||||
std::io::stdout().flush()?;
|
||||
|
||||
let mut _generated_tokens = 0usize;
|
||||
|
||||
let eos_token = match self.tokenizer.get_token("<eos>") {
|
||||
Some(token) => token,
|
||||
None => anyhow::bail!("cannot find the <eos> token"),
|
||||
@@ -207,7 +205,7 @@ impl TextGeneration {
|
||||
|
||||
let start_gen = std::time::Instant::now();
|
||||
|
||||
for index in 0..sample_len {
|
||||
for (_generated_tokens, index) in (0..sample_len).enumerate() {
|
||||
let context_size = if index > 0 { 1 } else { tokens.len() };
|
||||
let start_pos = tokens.len().saturating_sub(context_size);
|
||||
let ctxt = &tokens[start_pos..];
|
||||
@@ -229,7 +227,6 @@ impl TextGeneration {
|
||||
|
||||
let next_token = self.logits_processor.sample(&logits)?;
|
||||
tokens.push(next_token);
|
||||
_generated_tokens += 1;
|
||||
|
||||
if next_token == eos_token || next_token == eot_token {
|
||||
break;
|
||||
|
@@ -67,7 +67,7 @@ pub fn run_cli() -> anyhow::Result<()> {
|
||||
let cfg = GemmaInferenceConfig {
|
||||
tracing: args.tracing,
|
||||
prompt: args.prompt,
|
||||
model: args.model,
|
||||
model: Some(args.model),
|
||||
cpu: args.cpu,
|
||||
dtype: args.dtype,
|
||||
model_id: args.model_id,
|
||||
|
@@ -1,6 +1,5 @@
|
||||
pub mod llama_api;
|
||||
|
||||
use clap::ValueEnum;
|
||||
pub use llama_api::{run_llama_inference, LlamaInferenceConfig, WhichModel};
|
||||
|
||||
// Re-export constants and types that might be needed
|
||||
|
@@ -126,7 +126,7 @@ pub fn hub_load_safetensors(
|
||||
.iter()
|
||||
.map(|v| {
|
||||
repo.get(v)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
|
||||
.map_err(|e| std::io::Error::other(e))
|
||||
})
|
||||
.collect::<Result<Vec<_>, std::io::Error>>()?;
|
||||
Ok(safetensors_files)
|
||||
|
Reference in New Issue
Block a user