run cargo fmt

2025-09-08 22:46:44 +00:00 · 2025-09-04 13:45:25 -04:00
parent 1e02b12cda
commit c1c583faab
11 changed files with 241 additions and 170 deletions
--- a/integration/llama-runner/src/llama_api.rs
+++ b/integration/llama-runner/src/llama_api.rs
@@ -102,7 +102,7 @@ impl Default for LlamaInferenceConfig {
            max_tokens: 512,

            // Performance flags
-            no_kv_cache: false,   // keep cache ON for speed
+            no_kv_cache: false,    // keep cache ON for speed
            use_flash_attn: false, // great speed boost if supported

            // Precision: bf16 is a good default on Ampere+; fallback to fp16 if needed.