cleanup, add ci

2025-09-08 22:46:44 +00:00 · 2025-08-31 10:31:07 -04:00
parent 419e1c2ea7
commit f5d2a85f2e
42 changed files with 1740 additions and 705 deletions
--- a/crates/inference-engine/tests/model_tests.rs
+++ b/crates/inference-engine/tests/model_tests.rs
@@ -9,7 +9,10 @@ mod tests {
        // Test a few representative model variants
        assert_eq!(Which::Base2B.to_model_id(), "google/gemma-2b");
        assert_eq!(Which::Instruct7B.to_model_id(), "google/gemma-7b-it");
-        assert_eq!(Which::InstructV1_1_2B.to_model_id(), "google/gemma-1.1-2b-it");
+        assert_eq!(
+            Which::InstructV1_1_2B.to_model_id(),
+            "google/gemma-1.1-2b-it"
+        );
        assert_eq!(Which::CodeBase2B.to_model_id(), "google/codegemma-2b");
        assert_eq!(Which::BaseV2_2B.to_model_id(), "google/gemma-2-2b");
        assert_eq!(Which::InstructV3_1B.to_model_id(), "google/gemma-3-1b-it");
@@ -64,4 +67,4 @@ mod tests {
    // Note: Testing the Model enum's forward method would require creating actual model instances,
    // which is complex and would require loading model weights. This is better suited for
    // integration tests or mocking the models.
-}
+}
--- a/crates/inference-engine/tests/text_generation_tests.rs
+++ b/crates/inference-engine/tests/text_generation_tests.rs
@@ -106,7 +106,7 @@ mod tests {
        let logits_data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens = vec![1u32, 2u32, 3u32];
-        
+
        // Create a mock TextGeneration instance
        // Since we can't easily create a full TextGeneration instance without a model,
        // we'll test the logic by creating a simple struct with the necessary fields
@@ -115,7 +115,7 @@ mod tests {
            repeat_last_n: usize,
            penalty_cache: HashMap<usize, f32>,
        }
-        
+
        impl MockTextGeneration {
            fn apply_cached_repeat_penalty(
                &mut self,
@@ -167,16 +167,17 @@ mod tests {
                Ok((result, elapsed))
            }
        }
-        
+
        let mut mock_gen = MockTextGeneration {
            repeat_penalty: 1.0, // No penalty
            repeat_last_n: 3,
            penalty_cache: HashMap::new(),
        };
-        
-        let (result_logits, _duration) = mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
+
+        let (result_logits, _duration) =
+            mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
        let result_data = result_logits.to_vec1::<f32>()?;
-        
+
        // With no penalty, logits should be unchanged
        assert_eq!(result_data, logits_data);
        Ok(())
@@ -189,13 +190,13 @@ mod tests {
        let logits_data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens = vec![1u32, 2u32, 3u32];
-        
+
        struct MockTextGeneration {
            repeat_penalty: f32,
            repeat_last_n: usize,
            penalty_cache: HashMap<usize, f32>,
        }
-        
+
        impl MockTextGeneration {
            fn apply_cached_repeat_penalty(
                &mut self,
@@ -238,16 +239,17 @@ mod tests {
                Ok((result, elapsed))
            }
        }
-        
+
        let mut mock_gen = MockTextGeneration {
            repeat_penalty: 2.0, // Apply penalty
            repeat_last_n: 3,
            penalty_cache: HashMap::new(),
        };
-        
-        let (result_logits, _duration) = mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
+
+        let (result_logits, _duration) =
+            mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
        let result_data = result_logits.to_vec1::<f32>()?;
-        
+
        // Tokens 1, 2, 3 should be penalized (divided by 2.0)
        let expected = vec![1.0f32, 1.0, 1.5, 2.0, 5.0]; // [1.0, 2.0/2.0, 3.0/2.0, 4.0/2.0, 5.0]
        assert_eq!(result_data, expected);
@@ -261,13 +263,13 @@ mod tests {
        let logits_data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens = vec![1u32, 1u32, 1u32]; // Repeated token should use cache
-        
+
        struct MockTextGeneration {
            repeat_penalty: f32,
            repeat_last_n: usize,
            penalty_cache: HashMap<usize, f32>,
        }
-        
+
        impl MockTextGeneration {
            fn apply_cached_repeat_penalty(
                &mut self,
@@ -308,20 +310,21 @@ mod tests {
                Ok((result, elapsed))
            }
        }
-        
+
        let mut mock_gen = MockTextGeneration {
            repeat_penalty: 2.0,
            repeat_last_n: 3,
            penalty_cache: HashMap::new(),
        };
-        
+
        // First call should cache the penalty for token 1
-        let (_result_logits, _duration) = mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
-        
+        let (_result_logits, _duration) =
+            mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
+
        // Cache should contain the penalized value for token 1
        assert!(mock_gen.penalty_cache.contains_key(&1));
        assert_eq!(mock_gen.penalty_cache.get(&1), Some(&1.0)); // 2.0 / 2.0 = 1.0
-        
+
        Ok(())
    }

@@ -332,13 +335,13 @@ mod tests {
        let logits_data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens: Vec<u32> = vec![]; // Empty tokens
-        
+
        struct MockTextGeneration {
            repeat_penalty: f32,
            repeat_last_n: usize,
            penalty_cache: HashMap<usize, f32>,
        }
-        
+
        impl MockTextGeneration {
            fn apply_cached_repeat_penalty(
                &mut self,
@@ -379,16 +382,17 @@ mod tests {
                Ok((result, elapsed))
            }
        }
-        
+
        let mut mock_gen = MockTextGeneration {
            repeat_penalty: 2.0,
            repeat_last_n: 3,
            penalty_cache: HashMap::new(),
        };
-        
-        let (result_logits, _duration) = mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
+
+        let (result_logits, _duration) =
+            mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
        let result_data = result_logits.to_vec1::<f32>()?;
-        
+
        // With empty tokens, logits should be unchanged
        assert_eq!(result_data, logits_data);
        Ok(())
@@ -401,13 +405,13 @@ mod tests {
        let logits_data = vec![1.0f32, 2.0, 3.0];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens = vec![1u32, 5u32, 10u32]; // Token 5 and 10 are out of bounds
-        
+
        struct MockTextGeneration {
            repeat_penalty: f32,
            repeat_last_n: usize,
            penalty_cache: HashMap<usize, f32>,
        }
-        
+
        impl MockTextGeneration {
            fn apply_cached_repeat_penalty(
                &mut self,
@@ -448,16 +452,17 @@ mod tests {
                Ok((result, elapsed))
            }
        }
-        
+
        let mut mock_gen = MockTextGeneration {
            repeat_penalty: 2.0,
            repeat_last_n: 3,
            penalty_cache: HashMap::new(),
        };
-        
-        let (result_logits, _duration) = mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
+
+        let (result_logits, _duration) =
+            mock_gen.apply_cached_repeat_penalty(logits.clone(), &tokens)?;
        let result_data = result_logits.to_vec1::<f32>()?;
-        
+
        // Only token 1 should be penalized, out-of-bounds tokens should be ignored
        let expected = vec![1.0f32, 1.0, 3.0]; // [1.0, 2.0/2.0, 3.0]
        assert_eq!(result_data, expected);
@@ -471,52 +476,52 @@ mod tests {
        // Since creating a real TextGeneration instance requires a Model which needs model weights,
        // we'll create a test that demonstrates the method is now public and can be accessed.
        // The comprehensive functionality testing is already covered by the mock tests above.
-        
+
        // Test data setup
        let device = Device::Cpu;
        let logits_data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens = vec![1u32, 2u32, 3u32];
-        
+
        // Test that we can create the necessary components
        let tokenizer = create_test_tokenizer()?;
-        
+
        // The method is now public as confirmed by making it pub fn apply_cached_repeat_penalty
        // This test verifies the method signature and that it's accessible from external code
-        
+
        // We could create a TextGeneration instance if we had a way to mock the Model,
        // but for now we confirm that the existing mock tests cover the functionality
        // and the method is properly exposed as public
-        
+
        println!("apply_cached_repeat_penalty method is now public and accessible for testing");
        assert!(true);
        Ok(())
    }
-    
+
    // Integration test that demonstrates the method usage pattern
-    #[test] 
+    #[test]
    fn test_apply_cached_repeat_penalty_usage_pattern() -> Result<()> {
        // This test demonstrates how the apply_cached_repeat_penalty method would be used
        // in practice, even though we can't create a full TextGeneration instance in unit tests
-        
+
        let device = Device::Cpu;
        let logits_data = vec![1.5f32, 2.5, 3.5, 4.5, 5.5];
        let logits = Tensor::new(&logits_data[..], &device)?;
        let tokens = vec![1u32, 2u32, 1u32, 3u32]; // Repeated token 1 to test caching
-        
+
        // Test parameters that would be used with TextGeneration
        let repeat_penalty = 1.2f32;
        let repeat_last_n = 3usize;
        let mut penalty_cache: HashMap<usize, f32> = HashMap::new();
-        
+
        // Simulate the method's logic to verify it works as expected
        let start_time = std::time::Instant::now();
-        
+
        if repeat_penalty != 1.0 {
            let start_at = tokens.len().saturating_sub(repeat_last_n);
            let penalty_tokens = &tokens[start_at..];
            let mut logits_vec = logits.to_vec1::<f32>()?;
-            
+
            for &token_id in penalty_tokens {
                let token_id = token_id as usize;
                if token_id < logits_vec.len() {
@@ -531,14 +536,14 @@ mod tests {
                }
            }
        }
-        
+
        let _duration = start_time.elapsed();
-        
+
        // Verify that tokens were processed correctly
        assert!(penalty_cache.contains_key(&1)); // Token 1 should be cached
-        assert!(penalty_cache.contains_key(&2)); // Token 2 should be cached  
+        assert!(penalty_cache.contains_key(&2)); // Token 2 should be cached
        assert!(penalty_cache.contains_key(&3)); // Token 3 should be cached
-        
+
        println!("Successfully demonstrated apply_cached_repeat_penalty usage pattern");
        Ok(())
    }
--- a/crates/inference-engine/tests/token_output_stream_tests.rs
+++ b/crates/inference-engine/tests/token_output_stream_tests.rs
@@ -1,7 +1,7 @@
-use inference_engine::token_output_stream::TokenOutputStream;
-use tokenizers::Tokenizer;
-use std::path::PathBuf;
 use anyhow::Result;
+use inference_engine::token_output_stream::TokenOutputStream;
+use std::path::PathBuf;
+use tokenizers::Tokenizer;

 #[cfg(test)]
 mod tests {
@@ -19,7 +19,7 @@ mod tests {
    fn test_new_token_output_stream() -> Result<()> {
        let tokenizer = create_test_tokenizer()?;
        let token_stream = TokenOutputStream::new(tokenizer);
-        
+
        // Check that the token stream was created successfully
        assert!(token_stream.tokenizer().get_vocab(true).len() > 0);
        Ok(())
@@ -29,18 +29,18 @@ mod tests {
    fn test_clear() -> Result<()> {
        let tokenizer = create_test_tokenizer()?;
        let mut token_stream = TokenOutputStream::new(tokenizer);
-        
+
        // Add a token
        let token_id = token_stream.get_token("<eos>").unwrap();
        token_stream.next_token(token_id)?;
-        
+
        // Clear the stream
        token_stream.clear();
-        
+
        // Check that the stream is empty by trying to decode all
        let decoded = token_stream.decode_all()?;
        assert_eq!(decoded, "");
-        
+
        Ok(())
    }

@@ -48,15 +48,15 @@ mod tests {
    fn test_get_token() -> Result<()> {
        let tokenizer = create_test_tokenizer()?;
        let token_stream = TokenOutputStream::new(tokenizer);
-        
+
        // Get a token that should exist
        let eos_token = token_stream.get_token("<eos>");
        assert!(eos_token.is_some());
-        
+
        // Get a token that shouldn't exist
        let nonexistent_token = token_stream.get_token("<this_token_does_not_exist>");
        assert!(nonexistent_token.is_none());
-        
+
        Ok(())
    }

@@ -64,11 +64,14 @@ mod tests {
    fn test_next_token_and_decode() -> Result<()> {
        let tokenizer = create_test_tokenizer()?;
        let mut token_stream = TokenOutputStream::new(tokenizer);
-        
+
        // Get some tokens
-        let hello_tokens = token_stream.tokenizer().encode("Hello world", true).unwrap();
+        let hello_tokens = token_stream
+            .tokenizer()
+            .encode("Hello world", true)
+            .unwrap();
        let token_ids = hello_tokens.get_ids();
-        
+
        // Add tokens one by one
        let mut output = String::new();
        for &token_id in token_ids {
@@ -76,16 +79,16 @@ mod tests {
                output.push_str(&text);
            }
        }
-        
+
        // Get any remaining text
        if let Some(rest) = token_stream.decode_rest()? {
            output.push_str(&rest);
        }
-        
+
        // Check the output
        assert!(!output.is_empty());
        assert_eq!(output.trim(), "Hello world");
-        
+
        Ok(())
    }

@@ -93,22 +96,25 @@ mod tests {
    fn test_decode_all() -> Result<()> {
        let tokenizer = create_test_tokenizer()?;
        let mut token_stream = TokenOutputStream::new(tokenizer);
-        
+
        // Get some tokens
-        let hello_tokens = token_stream.tokenizer().encode("Hello world", true).unwrap();
+        let hello_tokens = token_stream
+            .tokenizer()
+            .encode("Hello world", true)
+            .unwrap();
        let token_ids = hello_tokens.get_ids();
-        
+
        // Add tokens one by one
        for &token_id in token_ids {
            token_stream.next_token(token_id)?;
        }
-        
+
        // Decode all
        let decoded = token_stream.decode_all()?;
-        
+
        // Check the output
        assert_eq!(decoded.trim(), "Hello world");
-        
+
        Ok(())
    }

@@ -116,14 +122,14 @@ mod tests {
    fn test_into_inner() -> Result<()> {
        let tokenizer = create_test_tokenizer()?;
        let token_stream = TokenOutputStream::new(tokenizer);
-        
+
        // Get the inner tokenizer
        let inner_tokenizer = token_stream.into_inner();
-        
+
        // Check that the inner tokenizer works
        let encoded = inner_tokenizer.encode("Test", true).unwrap();
        assert!(encoded.get_ids().len() > 0);
-        
+
        Ok(())
    }
-}
+}