mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
update docs
This commit is contained in:
@@ -3,13 +3,12 @@ members = [
|
||||
"crates/predict-otron-9000",
|
||||
"crates/inference-engine",
|
||||
"crates/embeddings-engine",
|
||||
"crates/leptos-app"
|
||||
"crates/leptos-app",
|
||||
"crates/helm-chart-tool"
|
||||
]
|
||||
default-members = ["crates/predict-otron-9000"]
|
||||
resolver = "2"
|
||||
|
||||
|
||||
|
||||
[[workspace.metadata.leptos]]
|
||||
# project name
|
||||
bin-package = "leptos-app"
|
||||
|
19
README.md
19
README.md
@@ -264,23 +264,6 @@ export RUST_LOG=trace
|
||||
export RUST_LOG=predict_otron_9000=debug,embeddings_engine=trace
|
||||
```
|
||||
|
||||
## Chat Interface
|
||||
|
||||
The project includes a WebAssembly-based chat interface built with the Leptos framework.
|
||||
|
||||
### Building the Chat Interface
|
||||
|
||||
```shell
|
||||
# Navigate to the leptos-app crate
|
||||
cd crates/leptos-app
|
||||
|
||||
# Build the WebAssembly package
|
||||
cargo build --target wasm32-unknown-unknown
|
||||
|
||||
# For development with trunk (if installed)
|
||||
trunk serve
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
The chat interface connects to the inference engine API and provides a user-friendly way to interact with the AI models. To use:
|
||||
@@ -298,8 +281,6 @@ The interface supports:
|
||||
|
||||
- **Inference Engine**: Currently provides a simplified implementation for chat completions. Full model loading and text generation capabilities from the inference-engine crate are not yet integrated into the unified server.
|
||||
- **Model Support**: Embeddings are limited to the Nomic Embed Text v1.5 model.
|
||||
- **Scalability**: Single-threaded model loading may impact performance under heavy load.
|
||||
- **Chat Interface**: The WebAssembly chat interface requires compilation to a static site before deployment.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
4
crates/embeddings-engine/README.md
Normal file
4
crates/embeddings-engine/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Embeddings Engine
|
||||
|
||||
A high-performance text embeddings service that generates vector representations of text using state-of-the-art models.
|
||||
This crate wraps the fastembed crate to provide embeddings and partially adapts the openai specification.
|
@@ -1,295 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>OpenAI-Compatible API Tester</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
h1, h2 {
|
||||
color: #333;
|
||||
}
|
||||
.container {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
textarea {
|
||||
width: 100%;
|
||||
height: 150px;
|
||||
padding: 10px;
|
||||
margin-bottom: 10px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
font-family: monospace;
|
||||
}
|
||||
button {
|
||||
background-color: #4CAF50;
|
||||
color: white;
|
||||
padding: 10px 15px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
}
|
||||
button:hover {
|
||||
background-color: #45a049;
|
||||
}
|
||||
pre {
|
||||
background-color: #f5f5f5;
|
||||
padding: 15px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
.response {
|
||||
margin-top: 20px;
|
||||
}
|
||||
.error {
|
||||
color: red;
|
||||
}
|
||||
.settings {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 10px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.settings div {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
label {
|
||||
margin-bottom: 5px;
|
||||
font-weight: bold;
|
||||
}
|
||||
input {
|
||||
padding: 8px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}
|
||||
.examples {
|
||||
margin-top: 30px;
|
||||
}
|
||||
.example-btn {
|
||||
background-color: #2196F3;
|
||||
margin-right: 10px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.example-btn:hover {
|
||||
background-color: #0b7dda;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>OpenAI-Compatible API Tester</h1>
|
||||
<p>Use this page to test the OpenAI-compatible chat completions endpoint of the local inference engine.</p>
|
||||
|
||||
<div class="container">
|
||||
<h2>Request Settings</h2>
|
||||
<div class="settings">
|
||||
<div>
|
||||
<label for="serverUrl">Server URL:</label>
|
||||
<input type="text" id="serverUrl" value="http://localhost:3777" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="model">Model:</label>
|
||||
<input type="text" id="model" value="gemma-3-1b-it" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="maxTokens">Max Tokens:</label>
|
||||
<input type="number" id="maxTokens" value="150" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="temperature">Temperature:</label>
|
||||
<input type="number" id="temperature" value="0.7" step="0.1" min="0" max="2" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="topP">Top P:</label>
|
||||
<input type="number" id="topP" value="0.9" step="0.1" min="0" max="1" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2>Request Body</h2>
|
||||
<textarea id="requestBody">{
|
||||
"model": "gemma-3-1b-it",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, how are you today?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 150,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.9
|
||||
}</textarea>
|
||||
<button id="sendRequest">Send Request</button>
|
||||
|
||||
<div class="examples">
|
||||
<h3>Example Requests</h3>
|
||||
<button class="example-btn" id="example1">Basic Question</button>
|
||||
<button class="example-btn" id="example2">Multi-turn Conversation</button>
|
||||
<button class="example-btn" id="example3">Creative Writing</button>
|
||||
<button class="example-btn" id="example4">Code Generation</button>
|
||||
</div>
|
||||
|
||||
<div class="response">
|
||||
<h2>Response</h2>
|
||||
<pre id="responseOutput">Response will appear here...</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// Update request body when settings change
|
||||
const serverUrlInput = document.getElementById('serverUrl');
|
||||
const modelInput = document.getElementById('model');
|
||||
const maxTokensInput = document.getElementById('maxTokens');
|
||||
const temperatureInput = document.getElementById('temperature');
|
||||
const topPInput = document.getElementById('topP');
|
||||
const requestBodyTextarea = document.getElementById('requestBody');
|
||||
const responseOutput = document.getElementById('responseOutput');
|
||||
|
||||
// Function to update request body from settings
|
||||
function updateRequestBodyFromSettings() {
|
||||
try {
|
||||
const requestBody = JSON.parse(requestBodyTextarea.value);
|
||||
requestBody.model = modelInput.value;
|
||||
requestBody.max_tokens = parseInt(maxTokensInput.value);
|
||||
requestBody.temperature = parseFloat(temperatureInput.value);
|
||||
requestBody.top_p = parseFloat(topPInput.value);
|
||||
requestBodyTextarea.value = JSON.stringify(requestBody, null, 2);
|
||||
} catch (error) {
|
||||
console.error("Error updating request body:", error);
|
||||
}
|
||||
}
|
||||
|
||||
// Update settings when request body changes
|
||||
function updateSettingsFromRequestBody() {
|
||||
try {
|
||||
const requestBody = JSON.parse(requestBodyTextarea.value);
|
||||
if (requestBody.model) modelInput.value = requestBody.model;
|
||||
if (requestBody.max_tokens) maxTokensInput.value = requestBody.max_tokens;
|
||||
if (requestBody.temperature) temperatureInput.value = requestBody.temperature;
|
||||
if (requestBody.top_p) topPInput.value = requestBody.top_p;
|
||||
} catch (error) {
|
||||
console.error("Error updating settings:", error);
|
||||
}
|
||||
}
|
||||
|
||||
// Add event listeners for settings changes
|
||||
modelInput.addEventListener('change', updateRequestBodyFromSettings);
|
||||
maxTokensInput.addEventListener('change', updateRequestBodyFromSettings);
|
||||
temperatureInput.addEventListener('change', updateRequestBodyFromSettings);
|
||||
topPInput.addEventListener('change', updateRequestBodyFromSettings);
|
||||
|
||||
// Add event listener for request body changes
|
||||
requestBodyTextarea.addEventListener('blur', updateSettingsFromRequestBody);
|
||||
|
||||
// Send request button
|
||||
document.getElementById('sendRequest').addEventListener('click', async function() {
|
||||
try {
|
||||
responseOutput.textContent = "Sending request...";
|
||||
const serverUrl = serverUrlInput.value;
|
||||
const endpoint = '/v1/chat/completions';
|
||||
const url = serverUrl + endpoint;
|
||||
|
||||
const requestBody = JSON.parse(requestBodyTextarea.value);
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody)
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
responseOutput.textContent = JSON.stringify(data, null, 2);
|
||||
} catch (error) {
|
||||
responseOutput.textContent = "Error: " + error.message;
|
||||
responseOutput.classList.add('error');
|
||||
}
|
||||
});
|
||||
|
||||
// Example requests
|
||||
document.getElementById('example1').addEventListener('click', function() {
|
||||
requestBodyTextarea.value = JSON.stringify({
|
||||
model: modelInput.value,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Who was the 16th president of the United States?"
|
||||
}
|
||||
],
|
||||
max_tokens: parseInt(maxTokensInput.value),
|
||||
temperature: parseFloat(temperatureInput.value),
|
||||
top_p: parseFloat(topPInput.value)
|
||||
}, null, 2);
|
||||
});
|
||||
|
||||
document.getElementById('example2').addEventListener('click', function() {
|
||||
requestBodyTextarea.value = JSON.stringify({
|
||||
model: modelInput.value,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant that provides concise answers."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "What is machine learning?"
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
content: "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Give me an example of a machine learning algorithm."
|
||||
}
|
||||
],
|
||||
max_tokens: parseInt(maxTokensInput.value),
|
||||
temperature: parseFloat(temperatureInput.value),
|
||||
top_p: parseFloat(topPInput.value)
|
||||
}, null, 2);
|
||||
});
|
||||
|
||||
document.getElementById('example3').addEventListener('click', function() {
|
||||
requestBodyTextarea.value = JSON.stringify({
|
||||
model: modelInput.value,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Write a short poem about artificial intelligence."
|
||||
}
|
||||
],
|
||||
max_tokens: parseInt(maxTokensInput.value),
|
||||
temperature: 0.9, // Higher temperature for creative tasks
|
||||
top_p: 0.9
|
||||
}, null, 2);
|
||||
temperatureInput.value = 0.9;
|
||||
});
|
||||
|
||||
document.getElementById('example4').addEventListener('click', function() {
|
||||
requestBodyTextarea.value = JSON.stringify({
|
||||
model: modelInput.value,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Write a Python function to calculate the Fibonacci sequence up to n terms."
|
||||
}
|
||||
],
|
||||
max_tokens: parseInt(maxTokensInput.value),
|
||||
temperature: 0.3, // Lower temperature for code generation
|
||||
top_p: 0.9
|
||||
}, null, 2);
|
||||
temperatureInput.value = 0.3;
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@@ -1,176 +0,0 @@
|
||||
// Test requests for the OpenAI-compatible endpoint in the inference server
|
||||
// This file contains IIFE (Immediately Invoked Function Expression) JavaScript requests
|
||||
// to test the /v1/chat/completions endpoint
|
||||
|
||||
// Basic chat completion request
|
||||
(async function testBasicChatCompletion() {
|
||||
console.log("Test 1: Basic chat completion request");
|
||||
try {
|
||||
const response = await fetch('http://localhost:3777/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gemma-2-2b-it",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Who was the 16th president of the United States?"
|
||||
}
|
||||
],
|
||||
max_tokens: 100
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
console.log("Response:", JSON.stringify(data, null, 2));
|
||||
} catch (error) {
|
||||
console.error("Error:", error);
|
||||
}
|
||||
})();
|
||||
|
||||
// Multi-turn conversation
|
||||
(async function testMultiTurnConversation() {
|
||||
console.log("\nTest 2: Multi-turn conversation");
|
||||
try {
|
||||
const response = await fetch('http://localhost:3777/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gemma-2-2b-it",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant that provides concise answers."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "What is machine learning?"
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
content: "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Give me an example of a machine learning algorithm."
|
||||
}
|
||||
],
|
||||
max_tokens: 150
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
console.log("Response:", JSON.stringify(data, null, 2));
|
||||
} catch (error) {
|
||||
console.error("Error:", error);
|
||||
}
|
||||
})();
|
||||
|
||||
// Request with temperature and top_p parameters
|
||||
(async function testTemperatureAndTopP() {
|
||||
console.log("\nTest 3: Request with temperature and top_p parameters");
|
||||
try {
|
||||
const response = await fetch('http://localhost:3777/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gemma-2-2b-it",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Write a short poem about artificial intelligence."
|
||||
}
|
||||
],
|
||||
max_tokens: 200,
|
||||
temperature: 0.8,
|
||||
top_p: 0.9
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
console.log("Response:", JSON.stringify(data, null, 2));
|
||||
} catch (error) {
|
||||
console.error("Error:", error);
|
||||
}
|
||||
})();
|
||||
|
||||
// Request with streaming enabled
|
||||
(async function testStreaming() {
|
||||
console.log("\nTest 4: Request with streaming enabled");
|
||||
try {
|
||||
const response = await fetch('http://localhost:3777/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gemma-2-2b-it",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Explain quantum computing in simple terms."
|
||||
}
|
||||
],
|
||||
max_tokens: 150,
|
||||
stream: true
|
||||
})
|
||||
});
|
||||
|
||||
// Note: Streaming might not be implemented yet, this is to test the API's handling of the parameter
|
||||
if (response.headers.get('content-type')?.includes('text/event-stream')) {
|
||||
console.log("Streaming response detected. Reading stream...");
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
console.log("Chunk:", chunk);
|
||||
}
|
||||
} else {
|
||||
const data = await response.json();
|
||||
console.log("Non-streaming response:", JSON.stringify(data, null, 2));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error:", error);
|
||||
}
|
||||
})();
|
||||
|
||||
// Request with a different model
|
||||
(async function testDifferentModel() {
|
||||
console.log("\nTest 5: Request with a different model");
|
||||
try {
|
||||
const response = await fetch('http://localhost:3777/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gemma-2-2b-it", // Using a different model if available
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "What are the benefits of renewable energy?"
|
||||
}
|
||||
],
|
||||
max_tokens: 150
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
console.log("Response:", JSON.stringify(data, null, 2));
|
||||
} catch (error) {
|
||||
console.error("Error:", error);
|
||||
}
|
||||
})();
|
||||
|
||||
console.log("\nAll test requests have been sent. Check the server logs for more details.");
|
||||
console.log("To run the server, use: cargo run --bin inference-engine -- --server");
|
8
crates/predict-otron-9000/README.md
Normal file
8
crates/predict-otron-9000/README.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# predict-otron-9000
|
||||
|
||||
This is an extensible axum/tokio hybrid combining [embeddings-engine](../embeddings-engine), [inference-engine](../inference-engine), and [leptos-app](../leptos-app).
|
||||
|
||||
|
||||
# Notes
|
||||
- When `server_mode` is Standalone (default), the instance contains all components necessary for inference.
|
||||
- When `server_mode` is HighAvailability, automatic scaling of inference and embeddings; proxies to inference and embeddings services via dns
|
1
integration/README.md
Normal file
1
integration/README.md
Normal file
@@ -0,0 +1 @@
|
||||
This package enables testing of a live instance of predict-otron-9000 using the openai node sdk directly.
|
@@ -1,5 +1,8 @@
|
||||
{
|
||||
"name": "@predict-otron-9000/ingeration",
|
||||
"scripts": {
|
||||
"test": "bun test"
|
||||
},
|
||||
"dependencies": {
|
||||
"openai": "^5.16.0"
|
||||
}
|
||||
|
@@ -1,5 +1,8 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"openai": "^5.16.0"
|
||||
},
|
||||
"scripts": {
|
||||
"cli": "./scripts/cli.ts"
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,3 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
cargo run --bin ptron
|
@@ -1,69 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Simple test script for inference-engine
|
||||
# This script sends a single chat completion request
|
||||
|
||||
echo "===== Inference Engine Test ====="
|
||||
|
||||
# Test parameters
|
||||
SERVER_URL="http://localhost:8080" # Changed from 8080 to 3777 to match main.rs default port
|
||||
MAX_TOKENS=10
|
||||
PROMPT="What is the capital of France?"
|
||||
MODEL="${MODEL_ID:-gemma-2-2b-it}" # Using gemma-2-2b-it as specified in the original test
|
||||
|
||||
# Create a temp directory for test results
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
echo "Storing test results in: $TEMP_DIR"
|
||||
|
||||
# Prepare JSON payload
|
||||
json_payload=$(cat <<EOF
|
||||
{
|
||||
"model": "$MODEL",
|
||||
"messages": [{"role": "user", "content": "$PROMPT"}],
|
||||
"max_tokens": $MAX_TOKENS
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Make sure the server is running
|
||||
echo "Checking if the server is running..."
|
||||
if ! curl -s "$SERVER_URL" > /dev/null; then
|
||||
echo "Server doesn't appear to be running at $SERVER_URL"
|
||||
echo "Please start the server with: ./run_server.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Sending request..."
|
||||
|
||||
# Send request and measure time
|
||||
start_time=$(date +%s.%N)
|
||||
|
||||
# Send the chat completion request with 30 second timeout
|
||||
# Note: The gemma-2-2b-it model takes ~12.57 seconds per token on average
|
||||
# So even with MAX_TOKENS=10, the request might time out before completion
|
||||
# The timeout ensures the script doesn't hang indefinitely
|
||||
response=$(curl -s -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload" \
|
||||
--max-time 30 \
|
||||
"$SERVER_URL/v1/chat/completions")
|
||||
|
||||
end_time=$(date +%s.%N)
|
||||
|
||||
# Calculate elapsed time
|
||||
elapsed=$(echo "$end_time - $start_time" | bc)
|
||||
|
||||
# Extract response content length
|
||||
content_length=$(echo "$response" | grep -o '"content":"[^"]*"' | wc -c)
|
||||
|
||||
# Check if we got an error
|
||||
error_check=$(echo "$response" | grep -c "error")
|
||||
if [ "$error_check" -gt 0 ]; then
|
||||
echo "Error in response: $response"
|
||||
fi
|
||||
|
||||
# Log results
|
||||
echo "Time: ${elapsed}s, Response size: $content_length bytes"
|
||||
echo "Response: $response"
|
||||
|
||||
echo -e "\nTest Complete"
|
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test script to reproduce token repetition issue with special characters
|
||||
const { fetch } = require('node-fetch');
|
||||
|
||||
async function testTokenRepetition() {
|
||||
console.log("Testing token repetition with special characters...");
|
||||
|
||||
try {
|
||||
const response = await fetch('http://localhost:8080/chat/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
message: "Write a simple greeting with punctuation marks like: Hello! How are you? I'm fine, thanks."
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error('No reader available');
|
||||
}
|
||||
|
||||
let fullResponse = '';
|
||||
let tokens = [];
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
if (parsed.token) {
|
||||
tokens.push(parsed.token);
|
||||
fullResponse += parsed.token;
|
||||
console.log(`Token: "${parsed.token}"`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(`Non-JSON data: ${data}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n=== ANALYSIS ===');
|
||||
console.log('Full response:', fullResponse);
|
||||
console.log('Total tokens:', tokens.length);
|
||||
|
||||
// Check for repetition issues
|
||||
const tokenCounts = {};
|
||||
let hasRepetition = false;
|
||||
|
||||
for (const token of tokens) {
|
||||
tokenCounts[token] = (tokenCounts[token] || 0) + 1;
|
||||
if (tokenCounts[token] > 1 && token.match(/[!?,.;:]/)) {
|
||||
console.log(`⚠️ Repetition detected: "${token}" appears ${tokenCounts[token]} times`);
|
||||
hasRepetition = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasRepetition) {
|
||||
console.log('✅ No token repetition detected');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error testing token repetition:', error);
|
||||
}
|
||||
}
|
||||
|
||||
testTokenRepetition();
|
Reference in New Issue
Block a user