- Refactored build_pipeline usage to ensure pipeline arguments are cloned.

- Introduced `reset_state` for clearing cached state between requests. - Enhanced chat UI with model selector and dynamic model fetching. - Improved error logging and detailed debug messages for chat request flows. - Added fresh instantiation of `TextGeneration` to prevent tensor shape mismatches.
2025-09-08 22:46:44 +00:00 · 2025-08-27 17:53:50 -04:00
parent f1b57866e1
commit 766d41af78
5 changed files with 185 additions and 209 deletions
--- a/crates/predict-otron-9000/src/main.rs
+++ b/crates/predict-otron-9000/src/main.rs
@@ -53,10 +53,11 @@ async fn main() {
    pipeline_args.model_id = "google/gemma-3-1b-it".to_string();
    pipeline_args.which = Which::InstructV3_1B;
    
-    let text_generation = build_pipeline(pipeline_args);
+    let text_generation = build_pipeline(pipeline_args.clone());
    let app_state = AppState {
        text_generation: std::sync::Arc::new(tokio::sync::Mutex::new(text_generation)),
        model_id: "google/gemma-3-1b-it".to_string(),
+        build_args: pipeline_args,
    };
    
    // Get the inference router directly from the inference engine