Introduce predict-otron-9000: Unified server combining embeddings and inference engines. Includes OpenAI-compatible APIs, full documentation, and example scripts.

This commit is contained in:
geoffsee
2025-08-16 19:11:35 -04:00
commit 2aa6d4cdf8
28 changed files with 16595 additions and 0 deletions

View File

@@ -0,0 +1,67 @@
use inference_engine::model::{Model, Which};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_which_to_model_id() {
// Test a few representative model variants
assert_eq!(Which::Base2B.to_model_id(), "google/gemma-2b");
assert_eq!(Which::Instruct7B.to_model_id(), "google/gemma-7b-it");
assert_eq!(Which::InstructV1_1_2B.to_model_id(), "google/gemma-1.1-2b-it");
assert_eq!(Which::CodeBase2B.to_model_id(), "google/codegemma-2b");
assert_eq!(Which::BaseV2_2B.to_model_id(), "google/gemma-2-2b");
assert_eq!(Which::InstructV3_1B.to_model_id(), "google/gemma-3-1b-it");
}
#[test]
fn test_which_is_instruct_model() {
// Test base models (should return false)
assert!(!Which::Base2B.is_instruct_model());
assert!(!Which::Base7B.is_instruct_model());
assert!(!Which::CodeBase2B.is_instruct_model());
assert!(!Which::CodeBase7B.is_instruct_model());
assert!(!Which::BaseV2_2B.is_instruct_model());
assert!(!Which::BaseV2_9B.is_instruct_model());
assert!(!Which::BaseV3_1B.is_instruct_model());
// Test instruct models (should return true)
assert!(Which::Instruct2B.is_instruct_model());
assert!(Which::Instruct7B.is_instruct_model());
assert!(Which::InstructV1_1_2B.is_instruct_model());
assert!(Which::InstructV1_1_7B.is_instruct_model());
assert!(Which::CodeInstruct2B.is_instruct_model());
assert!(Which::CodeInstruct7B.is_instruct_model());
assert!(Which::InstructV2_2B.is_instruct_model());
assert!(Which::InstructV2_9B.is_instruct_model());
assert!(Which::InstructV3_1B.is_instruct_model());
}
#[test]
fn test_which_is_v3_model() {
// Test non-v3 models (should return false)
assert!(!Which::Base2B.is_v3_model());
assert!(!Which::Base7B.is_v3_model());
assert!(!Which::Instruct2B.is_v3_model());
assert!(!Which::Instruct7B.is_v3_model());
assert!(!Which::InstructV1_1_2B.is_v3_model());
assert!(!Which::InstructV1_1_7B.is_v3_model());
assert!(!Which::CodeBase2B.is_v3_model());
assert!(!Which::CodeBase7B.is_v3_model());
assert!(!Which::CodeInstruct2B.is_v3_model());
assert!(!Which::CodeInstruct7B.is_v3_model());
assert!(!Which::BaseV2_2B.is_v3_model());
assert!(!Which::InstructV2_2B.is_v3_model());
assert!(!Which::BaseV2_9B.is_v3_model());
assert!(!Which::InstructV2_9B.is_v3_model());
// Test v3 models (should return true)
assert!(Which::BaseV3_1B.is_v3_model());
assert!(Which::InstructV3_1B.is_v3_model());
}
// Note: Testing the Model enum's forward method would require creating actual model instances,
// which is complex and would require loading model weights. This is better suited for
// integration tests or mocking the models.
}

View File

@@ -0,0 +1,101 @@
use anyhow::Result;
use candle_transformers::generation::LogitsProcessor;
use inference_engine::model::Which;
use inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
#[cfg(test)]
mod tests {
use super::*;
// Helper function to create a simple tokenizer for testing
fn create_test_tokenizer() -> Result<Tokenizer> {
// Create a simple tokenizer from the pretrained model
// This uses the tokenizer from the Hugging Face hub
let tokenizer = Tokenizer::from_pretrained("google/gemma-2b", None).unwrap();
Ok(tokenizer)
}
// Test the Which enum's to_model_id method
#[test]
fn test_which_model_id() {
assert_eq!(Which::Base2B.to_model_id(), "google/gemma-2b");
assert_eq!(Which::Instruct7B.to_model_id(), "google/gemma-7b-it");
}
// Test the Which enum's is_instruct_model method
#[test]
fn test_which_is_instruct() {
assert!(!Which::Base2B.is_instruct_model());
assert!(Which::Instruct7B.is_instruct_model());
}
// Test the Which enum's is_v3_model method
#[test]
fn test_which_is_v3() {
assert!(!Which::Base2B.is_v3_model());
assert!(Which::BaseV3_1B.is_v3_model());
}
// Test the TokenOutputStream functionality
#[test]
fn test_token_output_stream() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let mut token_stream = TokenOutputStream::new(tokenizer);
// Test encoding and decoding
let text = "Hello, world!";
let encoded = token_stream.tokenizer().encode(text, true).unwrap();
let token_ids = encoded.get_ids();
// Add tokens one by one
for &token_id in token_ids {
token_stream.next_token(token_id)?;
}
// Decode all and check
let decoded = token_stream.decode_all()?;
assert_eq!(decoded.trim(), text);
Ok(())
}
// Test the LogitsProcessor
#[test]
fn test_logits_processor() -> Result<()> {
// Create a LogitsProcessor with default settings
let seed = 42;
let temp = Some(0.8);
let top_p = Some(0.9);
let logits_processor = LogitsProcessor::new(seed, temp, top_p);
// Create a simple logits tensor
// In a real test, we would create a tensor with known values and verify
// that sampling produces expected results
// For now, we'll just verify that the LogitsProcessor can be created
assert!(true);
Ok(())
}
// Test the TextGeneration constructor
#[test]
fn test_text_generation_constructor() -> Result<()> {
// We can't easily create a Model instance for testing,
// but we can test that the constructor compiles and the types are correct
// In a real test with a mock Model, we would:
// 1. Create a mock model
// 2. Create a tokenizer
// 3. Call TextGeneration::new
// 4. Verify the properties of the created instance
// For now, we'll just verify that the code compiles
assert!(true);
Ok(())
}
// Note: Testing the actual text generation functionality would require
// integration tests with real models, which is beyond the scope of these unit tests.
// The tests above focus on the components that can be tested in isolation.
}

View File

@@ -0,0 +1,129 @@
use inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
use std::path::PathBuf;
use anyhow::Result;
#[cfg(test)]
mod tests {
use super::*;
// Helper function to create a simple tokenizer for testing
fn create_test_tokenizer() -> Result<Tokenizer> {
// Create a simple tokenizer from the pretrained model
// This uses the tokenizer from the Hugging Face hub
let tokenizer = Tokenizer::from_pretrained("google/gemma-2b", None).unwrap();
Ok(tokenizer)
}
#[test]
fn test_new_token_output_stream() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let token_stream = TokenOutputStream::new(tokenizer);
// Check that the token stream was created successfully
assert!(token_stream.tokenizer().get_vocab(true).len() > 0);
Ok(())
}
#[test]
fn test_clear() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let mut token_stream = TokenOutputStream::new(tokenizer);
// Add a token
let token_id = token_stream.get_token("<eos>").unwrap();
token_stream.next_token(token_id)?;
// Clear the stream
token_stream.clear();
// Check that the stream is empty by trying to decode all
let decoded = token_stream.decode_all()?;
assert_eq!(decoded, "");
Ok(())
}
#[test]
fn test_get_token() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let token_stream = TokenOutputStream::new(tokenizer);
// Get a token that should exist
let eos_token = token_stream.get_token("<eos>");
assert!(eos_token.is_some());
// Get a token that shouldn't exist
let nonexistent_token = token_stream.get_token("<this_token_does_not_exist>");
assert!(nonexistent_token.is_none());
Ok(())
}
#[test]
fn test_next_token_and_decode() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let mut token_stream = TokenOutputStream::new(tokenizer);
// Get some tokens
let hello_tokens = token_stream.tokenizer().encode("Hello world", true).unwrap();
let token_ids = hello_tokens.get_ids();
// Add tokens one by one
let mut output = String::new();
for &token_id in token_ids {
if let Some(text) = token_stream.next_token(token_id)? {
output.push_str(&text);
}
}
// Get any remaining text
if let Some(rest) = token_stream.decode_rest()? {
output.push_str(&rest);
}
// Check the output
assert!(!output.is_empty());
assert_eq!(output.trim(), "Hello world");
Ok(())
}
#[test]
fn test_decode_all() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let mut token_stream = TokenOutputStream::new(tokenizer);
// Get some tokens
let hello_tokens = token_stream.tokenizer().encode("Hello world", true).unwrap();
let token_ids = hello_tokens.get_ids();
// Add tokens one by one
for &token_id in token_ids {
token_stream.next_token(token_id)?;
}
// Decode all
let decoded = token_stream.decode_all()?;
// Check the output
assert_eq!(decoded.trim(), "Hello world");
Ok(())
}
#[test]
fn test_into_inner() -> Result<()> {
let tokenizer = create_test_tokenizer()?;
let token_stream = TokenOutputStream::new(tokenizer);
// Get the inner tokenizer
let inner_tokenizer = token_stream.into_inner();
// Check that the inner tokenizer works
let encoded = inner_tokenizer.encode("Test", true).unwrap();
assert!(encoded.get_ids().len() > 0);
Ok(())
}
}