mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
Add comprehensive test orchestration framework: Test Runner (cmd/test-runner/): - config.go: YAML configuration loading and validation - server.go: Ollama server lifecycle management (start/stop/health checks) - monitor.go: Real-time log monitoring with pattern matching - test.go: Model testing via Ollama API (pull, chat, validation) - validate.go: Test result validation (GPU usage, response quality, log analysis) - report.go: Structured reporting (JSON and Markdown formats) - main.go: CLI interface with run/validate/list commands Test Configurations (test/config/): - models.yaml: Full test suite with quick/full/stress profiles - quick.yaml: Fast smoke test with gemma2:2b Updated Workflow: - tesla-k80-tests.yml: Use test-runner instead of shell scripts - Run quick tests first, then full tests if passing - Generate structured JSON reports for pass/fail checking - Upload test results as artifacts Features: - Multi-model testing with configurable profiles - API-based testing (not CLI commands) - Real-time log monitoring for GPU events and errors - Automatic validation of GPU loading and response quality - Structured JSON and Markdown reports - Graceful server lifecycle management - Interrupt handling (Ctrl+C cleanup) Addresses limitations of shell-based testing by providing: - Better error handling and reporting - Programmatic test orchestration - Reusable test framework - Clear pass/fail criteria - Detailed test metrics and timing
224 lines
6.0 KiB
Go
224 lines
6.0 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// TestResult represents the result of a model test
|
|
type TestResult struct {
|
|
ModelName string `json:"model_name"`
|
|
Status TestStatus `json:"status"`
|
|
StartTime time.Time `json:"start_time"`
|
|
EndTime time.Time `json:"end_time"`
|
|
Duration time.Duration `json:"duration"`
|
|
PromptTests []PromptTest `json:"prompt_tests"`
|
|
ErrorMessage string `json:"error_message,omitempty"`
|
|
Warnings []string `json:"warnings,omitempty"`
|
|
}
|
|
|
|
// TestStatus represents the status of a test
|
|
type TestStatus string
|
|
|
|
const (
|
|
StatusPassed TestStatus = "PASSED"
|
|
StatusFailed TestStatus = "FAILED"
|
|
StatusSkipped TestStatus = "SKIPPED"
|
|
)
|
|
|
|
// PromptTest represents the result of a single prompt test
|
|
type PromptTest struct {
|
|
Prompt string `json:"prompt"`
|
|
Response string `json:"response"`
|
|
ResponseTokens int `json:"response_tokens"`
|
|
Duration time.Duration `json:"duration"`
|
|
Status TestStatus `json:"status"`
|
|
ErrorMessage string `json:"error_message,omitempty"`
|
|
}
|
|
|
|
// ModelTester runs tests for models
|
|
type ModelTester struct {
|
|
serverURL string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// NewModelTester creates a new model tester
|
|
func NewModelTester(serverURL string) *ModelTester {
|
|
return &ModelTester{
|
|
serverURL: serverURL,
|
|
httpClient: &http.Client{
|
|
Timeout: 5 * time.Minute, // Long timeout for model operations
|
|
},
|
|
}
|
|
}
|
|
|
|
// TestModel runs all tests for a single model
|
|
func (t *ModelTester) TestModel(ctx context.Context, modelTest ModelTest) TestResult {
|
|
result := TestResult{
|
|
ModelName: modelTest.Name,
|
|
StartTime: time.Now(),
|
|
Status: StatusPassed,
|
|
PromptTests: make([]PromptTest, 0),
|
|
}
|
|
|
|
// Pull model first
|
|
fmt.Printf("Pulling model %s...\n", modelTest.Name)
|
|
if err := t.pullModel(ctx, modelTest.Name); err != nil {
|
|
result.Status = StatusFailed
|
|
result.ErrorMessage = fmt.Sprintf("Failed to pull model: %v", err)
|
|
result.EndTime = time.Now()
|
|
result.Duration = result.EndTime.Sub(result.StartTime)
|
|
return result
|
|
}
|
|
fmt.Printf("Model %s pulled successfully\n", modelTest.Name)
|
|
|
|
// Run each prompt test
|
|
for i, prompt := range modelTest.Prompts {
|
|
fmt.Printf("Testing prompt %d/%d for %s\n", i+1, len(modelTest.Prompts), modelTest.Name)
|
|
|
|
promptTest := t.testPrompt(ctx, modelTest.Name, prompt, modelTest.Timeout)
|
|
result.PromptTests = append(result.PromptTests, promptTest)
|
|
|
|
// Update overall status based on prompt test result
|
|
if promptTest.Status == StatusFailed {
|
|
result.Status = StatusFailed
|
|
}
|
|
}
|
|
|
|
result.EndTime = time.Now()
|
|
result.Duration = result.EndTime.Sub(result.StartTime)
|
|
|
|
fmt.Printf("Model %s test completed: %s\n", modelTest.Name, result.Status)
|
|
return result
|
|
}
|
|
|
|
// pullModel pulls a model using the Ollama API
|
|
func (t *ModelTester) pullModel(ctx context.Context, modelName string) error {
|
|
url := t.serverURL + "/api/pull"
|
|
|
|
reqBody := map[string]interface{}{
|
|
"name": modelName,
|
|
}
|
|
|
|
jsonData, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal request: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := t.httpClient.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return fmt.Errorf("pull failed with status %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
// Read response stream (pull progress)
|
|
scanner := bufio.NewScanner(resp.Body)
|
|
for scanner.Scan() {
|
|
var progress map[string]interface{}
|
|
if err := json.Unmarshal(scanner.Bytes(), &progress); err != nil {
|
|
continue
|
|
}
|
|
// Could print progress here if verbose mode is enabled
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// testPrompt tests a single prompt
|
|
func (t *ModelTester) testPrompt(ctx context.Context, modelName, prompt string, timeout time.Duration) PromptTest {
|
|
result := PromptTest{
|
|
Prompt: prompt,
|
|
Status: StatusPassed,
|
|
}
|
|
|
|
startTime := time.Now()
|
|
|
|
// Create context with timeout
|
|
testCtx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
|
|
// Call chat API
|
|
response, err := t.chat(testCtx, modelName, prompt)
|
|
if err != nil {
|
|
result.Status = StatusFailed
|
|
result.ErrorMessage = err.Error()
|
|
result.Duration = time.Since(startTime)
|
|
return result
|
|
}
|
|
|
|
result.Response = response
|
|
result.ResponseTokens = estimateTokens(response)
|
|
result.Duration = time.Since(startTime)
|
|
|
|
return result
|
|
}
|
|
|
|
// chat sends a chat request to the ollama API
|
|
func (t *ModelTester) chat(ctx context.Context, modelName, prompt string) (string, error) {
|
|
url := t.serverURL + "/api/generate"
|
|
|
|
reqBody := map[string]interface{}{
|
|
"model": modelName,
|
|
"prompt": prompt,
|
|
"stream": false,
|
|
}
|
|
|
|
jsonData, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to marshal request: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := t.httpClient.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return "", fmt.Errorf("chat failed with status %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var response struct {
|
|
Response string `json:"response"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
|
|
return "", fmt.Errorf("failed to decode response: %w", err)
|
|
}
|
|
|
|
return response.Response, nil
|
|
}
|
|
|
|
// estimateTokens estimates the number of tokens in a text
|
|
// This is a rough approximation
|
|
func estimateTokens(text string) int {
|
|
// Rough estimate: 1 token ≈ 4 characters on average
|
|
words := strings.Fields(text)
|
|
return len(words)
|
|
}
|