mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
- Refactored build_pipeline
usage to ensure pipeline arguments are cloned.
- Introduced `reset_state` for clearing cached state between requests. - Enhanced chat UI with model selector and dynamic model fetching. - Improved error logging and detailed debug messages for chat request flows. - Added fresh instantiation of `TextGeneration` to prevent tensor shape mismatches.
This commit is contained in:
@@ -53,10 +53,11 @@ async fn main() {
|
||||
pipeline_args.model_id = "google/gemma-3-1b-it".to_string();
|
||||
pipeline_args.which = Which::InstructV3_1B;
|
||||
|
||||
let text_generation = build_pipeline(pipeline_args);
|
||||
let text_generation = build_pipeline(pipeline_args.clone());
|
||||
let app_state = AppState {
|
||||
text_generation: std::sync::Arc::new(tokio::sync::Mutex::new(text_generation)),
|
||||
model_id: "google/gemma-3-1b-it".to_string(),
|
||||
build_args: pipeline_args,
|
||||
};
|
||||
|
||||
// Get the inference router directly from the inference engine
|
||||
|
Reference in New Issue
Block a user