mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
streaming implementaion re-added to UI
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -893,6 +893,7 @@ dependencies = [
|
|||||||
"axum",
|
"axum",
|
||||||
"console_error_panic_hook",
|
"console_error_panic_hook",
|
||||||
"gloo-net",
|
"gloo-net",
|
||||||
|
"js-sys",
|
||||||
"leptos",
|
"leptos",
|
||||||
"leptos_axum",
|
"leptos_axum",
|
||||||
"leptos_meta",
|
"leptos_meta",
|
||||||
@@ -902,6 +903,7 @@ dependencies = [
|
|||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
"web-sys",
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -15,10 +15,26 @@ leptos_axum = { version = "0.8.0", optional = true }
|
|||||||
leptos_meta = { version = "0.8.0" }
|
leptos_meta = { version = "0.8.0" }
|
||||||
tokio = { version = "1", features = ["rt-multi-thread"], optional = true }
|
tokio = { version = "1", features = ["rt-multi-thread"], optional = true }
|
||||||
wasm-bindgen = { version = "=0.2.100", optional = true }
|
wasm-bindgen = { version = "=0.2.100", optional = true }
|
||||||
|
wasm-bindgen-futures = "0.4"
|
||||||
|
js-sys = "0.3"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
reqwest = { version = "0.12", features = ["json"] }
|
reqwest = { version = "0.12", features = ["json"] }
|
||||||
web-sys = { version = "0.3", features = ["console"] }
|
web-sys = { version = "0.3", features = [
|
||||||
|
"console",
|
||||||
|
"EventSource",
|
||||||
|
"MessageEvent",
|
||||||
|
"Window",
|
||||||
|
"Request",
|
||||||
|
"RequestInit",
|
||||||
|
"Response",
|
||||||
|
"Headers",
|
||||||
|
"ReadableStream",
|
||||||
|
"ReadableStreamDefaultReader",
|
||||||
|
"TextDecoder",
|
||||||
|
"TextDecoderOptions",
|
||||||
|
"HtmlInputElement"
|
||||||
|
] }
|
||||||
gloo-net = { version = "0.6", features = ["http"] }
|
gloo-net = { version = "0.6", features = ["http"] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
@@ -1,24 +0,0 @@
|
|||||||
This is free and unencumbered software released into the public domain.
|
|
||||||
|
|
||||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
||||||
distribute this software, either in source code form or as a compiled
|
|
||||||
binary, for any purpose, commercial or non-commercial, and by any
|
|
||||||
means.
|
|
||||||
|
|
||||||
In jurisdictions that recognize copyright laws, the author or authors
|
|
||||||
of this software dedicate any and all copyright interest in the
|
|
||||||
software to the public domain. We make this dedication for the benefit
|
|
||||||
of the public at large and to the detriment of our heirs and
|
|
||||||
successors. We intend this dedication to be an overt act of
|
|
||||||
relinquishment in perpetuity of all present and future rights to this
|
|
||||||
software under copyright law.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
||||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
||||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
For more information, please refer to <https://unlicense.org>
|
|
@@ -49,7 +49,6 @@ use leptos_router::{
|
|||||||
};
|
};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use web_sys::console;
|
use web_sys::console;
|
||||||
// Remove spawn_local import as we'll use different approach
|
|
||||||
|
|
||||||
// Data structures for OpenAI-compatible API
|
// Data structures for OpenAI-compatible API
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -73,6 +72,29 @@ pub struct ChatChoice {
|
|||||||
pub finish_reason: Option<String>,
|
pub finish_reason: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Streaming response structures
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct StreamDelta {
|
||||||
|
pub role: Option<String>,
|
||||||
|
pub content: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct StreamChoice {
|
||||||
|
pub index: u32,
|
||||||
|
pub delta: StreamDelta,
|
||||||
|
pub finish_reason: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct StreamChatResponse {
|
||||||
|
pub id: String,
|
||||||
|
pub object: String,
|
||||||
|
pub created: u64,
|
||||||
|
pub model: String,
|
||||||
|
pub choices: Vec<StreamChoice>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
pub struct ChatResponse {
|
pub struct ChatResponse {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
@@ -161,6 +183,129 @@ pub async fn send_chat_completion(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Streaming chat completion using EventSource
|
||||||
|
#[cfg(target_arch = "wasm32")]
|
||||||
|
pub fn send_chat_completion_stream(
|
||||||
|
messages: Vec<ChatMessage>,
|
||||||
|
model: String,
|
||||||
|
on_chunk: impl Fn(String) + 'static,
|
||||||
|
on_complete: impl Fn() + 'static,
|
||||||
|
on_error: impl Fn(String) + 'static,
|
||||||
|
) {
|
||||||
|
use wasm_bindgen::prelude::*;
|
||||||
|
use wasm_bindgen::JsCast;
|
||||||
|
|
||||||
|
let request = ChatRequest {
|
||||||
|
model,
|
||||||
|
messages,
|
||||||
|
max_tokens: Some(1024),
|
||||||
|
stream: Some(true),
|
||||||
|
};
|
||||||
|
|
||||||
|
// We need to send a POST request but EventSource only supports GET
|
||||||
|
// So we'll use fetch with a readable stream instead
|
||||||
|
let window = web_sys::window().unwrap();
|
||||||
|
let request_json = serde_json::to_string(&request).unwrap();
|
||||||
|
|
||||||
|
let opts = web_sys::RequestInit::new();
|
||||||
|
opts.set_method("POST");
|
||||||
|
opts.set_body(&JsValue::from_str(&request_json));
|
||||||
|
|
||||||
|
let headers = web_sys::Headers::new().unwrap();
|
||||||
|
headers.set("Content-Type", "application/json").unwrap();
|
||||||
|
headers.set("Accept", "text/event-stream").unwrap();
|
||||||
|
opts.set_headers(&headers);
|
||||||
|
|
||||||
|
let request = web_sys::Request::new_with_str_and_init("/v1/chat/completions", &opts).unwrap();
|
||||||
|
|
||||||
|
let promise = window.fetch_with_request(&request);
|
||||||
|
|
||||||
|
wasm_bindgen_futures::spawn_local(async move {
|
||||||
|
match wasm_bindgen_futures::JsFuture::from(promise).await {
|
||||||
|
Ok(resp_value) => {
|
||||||
|
let resp: web_sys::Response = resp_value.dyn_into().unwrap();
|
||||||
|
|
||||||
|
if !resp.ok() {
|
||||||
|
on_error(format!("Server error: {}", resp.status()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let body = resp.body();
|
||||||
|
if body.is_none() {
|
||||||
|
on_error("No response body".to_string());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let reader = body
|
||||||
|
.unwrap()
|
||||||
|
.get_reader()
|
||||||
|
.dyn_into::<web_sys::ReadableStreamDefaultReader>()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let decoder = web_sys::TextDecoder::new().unwrap();
|
||||||
|
let mut buffer = String::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match wasm_bindgen_futures::JsFuture::from(reader.read()).await {
|
||||||
|
Ok(result) => {
|
||||||
|
let done = js_sys::Reflect::get(&result, &JsValue::from_str("done"))
|
||||||
|
.unwrap()
|
||||||
|
.as_bool()
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
if done {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = js_sys::Reflect::get(&result, &JsValue::from_str("value")).unwrap();
|
||||||
|
let array = js_sys::Uint8Array::new(&value);
|
||||||
|
let mut bytes = vec![0; array.length() as usize];
|
||||||
|
array.copy_to(&mut bytes);
|
||||||
|
let text = decoder.decode_with_u8_array(&bytes).unwrap();
|
||||||
|
|
||||||
|
buffer.push_str(&text);
|
||||||
|
|
||||||
|
// Process complete SSE events from buffer
|
||||||
|
while let Some(event_end) = buffer.find("\n\n") {
|
||||||
|
let event = buffer[..event_end].to_string();
|
||||||
|
buffer = buffer[event_end + 2..].to_string();
|
||||||
|
|
||||||
|
// Parse SSE event
|
||||||
|
for line in event.lines() {
|
||||||
|
if let Some(data) = line.strip_prefix("data: ") {
|
||||||
|
if data == "[DONE]" {
|
||||||
|
on_complete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse JSON chunk
|
||||||
|
if let Ok(chunk) = serde_json::from_str::<StreamChatResponse>(data) {
|
||||||
|
if let Some(choice) = chunk.choices.first() {
|
||||||
|
if let Some(content) = &choice.delta.content {
|
||||||
|
on_chunk(content.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
on_error(format!("Read error: {:?}", e));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
on_complete();
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
on_error(format!("Fetch error: {:?}", e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
pub fn shell(options: LeptosOptions) -> impl IntoView {
|
pub fn shell(options: LeptosOptions) -> impl IntoView {
|
||||||
view! {
|
view! {
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
@@ -221,6 +366,13 @@ fn ChatPage() -> impl IntoView {
|
|||||||
// State for available models and selected model
|
// State for available models and selected model
|
||||||
let available_models = RwSignal::new(Vec::<ModelInfo>::new());
|
let available_models = RwSignal::new(Vec::<ModelInfo>::new());
|
||||||
let selected_model = RwSignal::new(String::from("gemma-3-1b-it")); // Default model
|
let selected_model = RwSignal::new(String::from("gemma-3-1b-it")); // Default model
|
||||||
|
|
||||||
|
// State for streaming response
|
||||||
|
let streaming_content = RwSignal::new(String::new());
|
||||||
|
let is_streaming = RwSignal::new(false);
|
||||||
|
|
||||||
|
// State for streaming mode toggle
|
||||||
|
let use_streaming = RwSignal::new(true); // Default to streaming
|
||||||
|
|
||||||
// Client-side only: Fetch models on component mount
|
// Client-side only: Fetch models on component mount
|
||||||
#[cfg(target_arch = "wasm32")]
|
#[cfg(target_arch = "wasm32")]
|
||||||
@@ -265,25 +417,63 @@ fn ChatPage() -> impl IntoView {
|
|||||||
// Prepare messages for API call
|
// Prepare messages for API call
|
||||||
let current_messages = messages.get();
|
let current_messages = messages.get();
|
||||||
let current_model = selected_model.get();
|
let current_model = selected_model.get();
|
||||||
|
let should_stream = use_streaming.get();
|
||||||
|
|
||||||
// Spawn async task to call API
|
if should_stream {
|
||||||
spawn_local(async move {
|
// Clear streaming content and set streaming flag
|
||||||
match send_chat_completion(current_messages, current_model).await {
|
streaming_content.set(String::new());
|
||||||
Ok(response_content) => {
|
is_streaming.set(true);
|
||||||
let assistant_message = ChatMessage {
|
|
||||||
role: "assistant".to_string(),
|
// Use streaming API
|
||||||
content: response_content,
|
send_chat_completion_stream(
|
||||||
};
|
current_messages,
|
||||||
messages.update(|msgs| msgs.push(assistant_message));
|
current_model,
|
||||||
|
move |chunk| {
|
||||||
|
// Append chunk to streaming content
|
||||||
|
streaming_content.update(|content| content.push_str(&chunk));
|
||||||
|
},
|
||||||
|
move || {
|
||||||
|
// On complete, move streaming content to messages
|
||||||
|
let final_content = streaming_content.get();
|
||||||
|
if !final_content.is_empty() {
|
||||||
|
let assistant_message = ChatMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: final_content,
|
||||||
|
};
|
||||||
|
messages.update(|msgs| msgs.push(assistant_message));
|
||||||
|
}
|
||||||
|
streaming_content.set(String::new());
|
||||||
|
is_streaming.set(false);
|
||||||
is_loading.set(false);
|
is_loading.set(false);
|
||||||
}
|
},
|
||||||
Err(error) => {
|
move |error| {
|
||||||
console::log_1(&format!("API Error: {}", error).into());
|
console::log_1(&format!("Streaming Error: {}", error).into());
|
||||||
error_message.set(Some(error));
|
error_message.set(Some(error));
|
||||||
|
is_streaming.set(false);
|
||||||
is_loading.set(false);
|
is_loading.set(false);
|
||||||
|
streaming_content.set(String::new());
|
||||||
|
},
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Use non-streaming API
|
||||||
|
spawn_local(async move {
|
||||||
|
match send_chat_completion(current_messages, current_model).await {
|
||||||
|
Ok(response_content) => {
|
||||||
|
let assistant_message = ChatMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: response_content,
|
||||||
|
};
|
||||||
|
messages.update(|msgs| msgs.push(assistant_message));
|
||||||
|
is_loading.set(false);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
console::log_1(&format!("API Error: {}", error).into());
|
||||||
|
error_message.set(Some(error));
|
||||||
|
is_loading.set(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
});
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -329,6 +519,19 @@ fn ChatPage() -> impl IntoView {
|
|||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
</select>
|
</select>
|
||||||
|
<div class="streaming-toggle">
|
||||||
|
<label>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
prop:checked=move || use_streaming.get()
|
||||||
|
on:change=move |ev| {
|
||||||
|
let target = event_target::<web_sys::HtmlInputElement>(&ev);
|
||||||
|
use_streaming.set(target.checked());
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
" Use streaming"
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -348,7 +551,24 @@ fn ChatPage() -> impl IntoView {
|
|||||||
/>
|
/>
|
||||||
|
|
||||||
{move || {
|
{move || {
|
||||||
if is_loading.get() {
|
if is_streaming.get() {
|
||||||
|
let content = streaming_content.get();
|
||||||
|
if !content.is_empty() {
|
||||||
|
view! {
|
||||||
|
<div class="message assistant-message streaming">
|
||||||
|
<div class="message-role">"assistant"</div>
|
||||||
|
<div class="message-content">{content}<span class="cursor">"▊"</span></div>
|
||||||
|
</div>
|
||||||
|
}.into_any()
|
||||||
|
} else {
|
||||||
|
view! {
|
||||||
|
<div class="message assistant-message loading">
|
||||||
|
<div class="message-role">"assistant"</div>
|
||||||
|
<div class="message-content">"Thinking..."</div>
|
||||||
|
</div>
|
||||||
|
}.into_any()
|
||||||
|
}
|
||||||
|
} else if is_loading.get() && !use_streaming.get() {
|
||||||
view! {
|
view! {
|
||||||
<div class="message assistant-message loading">
|
<div class="message assistant-message loading">
|
||||||
<div class="message-role">"assistant"</div>
|
<div class="message-role">"assistant"</div>
|
||||||
|
@@ -42,6 +42,7 @@ body {
|
|||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
gap: 0.5rem;
|
gap: 0.5rem;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
|
||||||
label {
|
label {
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
@@ -69,6 +70,24 @@ body {
|
|||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.streaming-toggle {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
margin-left: 1rem;
|
||||||
|
|
||||||
|
label {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
|
||||||
|
input[type="checkbox"] {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -134,6 +153,16 @@ body {
|
|||||||
color: #6b7280;
|
color: #6b7280;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
&.streaming {
|
||||||
|
.message-content {
|
||||||
|
.cursor {
|
||||||
|
display: inline-block;
|
||||||
|
animation: blink 1s infinite;
|
||||||
|
color: #9ca3af;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -223,4 +252,14 @@ body {
|
|||||||
|
|
||||||
.chat-messages::-webkit-scrollbar-thumb:hover {
|
.chat-messages::-webkit-scrollbar-thumb:hover {
|
||||||
background: #a8a8a8;
|
background: #a8a8a8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Cursor blink animation */
|
||||||
|
@keyframes blink {
|
||||||
|
0%, 50% {
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
51%, 100% {
|
||||||
|
opacity: 0;
|
||||||
|
}
|
||||||
}
|
}
|
@@ -15,7 +15,7 @@ CONNECT_TIMEOUT=${CONNECT_TIMEOUT:-10}
|
|||||||
MAX_TIME=${MAX_TIME:-30}
|
MAX_TIME=${MAX_TIME:-30}
|
||||||
|
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
[info] POST $SERVER_URL/v1/chat/completions/stream (SSE)
|
[info] POST $SERVER_URL/v1/chat/completions (SSE)
|
||||||
[info] model=$MODEL_ID, max_tokens=$MAX_TOKENS
|
[info] model=$MODEL_ID, max_tokens=$MAX_TOKENS
|
||||||
[info] prompt=$PROMPT
|
[info] prompt=$PROMPT
|
||||||
[info] timeouts: connect=${CONNECT_TIMEOUT}s, max=${MAX_TIME}s
|
[info] timeouts: connect=${CONNECT_TIMEOUT}s, max=${MAX_TIME}s
|
||||||
|
Reference in New Issue
Block a user