Files
ollama37/cmd/test-runner/test.go
Shang Chieh Tseng d59284d30a Implement Go-based test runner framework for Tesla K80 testing
Add comprehensive test orchestration framework:

Test Runner (cmd/test-runner/):
- config.go: YAML configuration loading and validation
- server.go: Ollama server lifecycle management (start/stop/health checks)
- monitor.go: Real-time log monitoring with pattern matching
- test.go: Model testing via Ollama API (pull, chat, validation)
- validate.go: Test result validation (GPU usage, response quality, log analysis)
- report.go: Structured reporting (JSON and Markdown formats)
- main.go: CLI interface with run/validate/list commands

Test Configurations (test/config/):
- models.yaml: Full test suite with quick/full/stress profiles
- quick.yaml: Fast smoke test with gemma2:2b

Updated Workflow:
- tesla-k80-tests.yml: Use test-runner instead of shell scripts
- Run quick tests first, then full tests if passing
- Generate structured JSON reports for pass/fail checking
- Upload test results as artifacts

Features:
- Multi-model testing with configurable profiles
- API-based testing (not CLI commands)
- Real-time log monitoring for GPU events and errors
- Automatic validation of GPU loading and response quality
- Structured JSON and Markdown reports
- Graceful server lifecycle management
- Interrupt handling (Ctrl+C cleanup)

Addresses limitations of shell-based testing by providing:
- Better error handling and reporting
- Programmatic test orchestration
- Reusable test framework
- Clear pass/fail criteria
- Detailed test metrics and timing
2025-10-30 11:04:48 +08:00

224 lines
6.0 KiB
Go

package main
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// TestResult represents the result of a model test
type TestResult struct {
ModelName string `json:"model_name"`
Status TestStatus `json:"status"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Duration time.Duration `json:"duration"`
PromptTests []PromptTest `json:"prompt_tests"`
ErrorMessage string `json:"error_message,omitempty"`
Warnings []string `json:"warnings,omitempty"`
}
// TestStatus represents the status of a test
type TestStatus string
const (
StatusPassed TestStatus = "PASSED"
StatusFailed TestStatus = "FAILED"
StatusSkipped TestStatus = "SKIPPED"
)
// PromptTest represents the result of a single prompt test
type PromptTest struct {
Prompt string `json:"prompt"`
Response string `json:"response"`
ResponseTokens int `json:"response_tokens"`
Duration time.Duration `json:"duration"`
Status TestStatus `json:"status"`
ErrorMessage string `json:"error_message,omitempty"`
}
// ModelTester runs tests for models
type ModelTester struct {
serverURL string
httpClient *http.Client
}
// NewModelTester creates a new model tester
func NewModelTester(serverURL string) *ModelTester {
return &ModelTester{
serverURL: serverURL,
httpClient: &http.Client{
Timeout: 5 * time.Minute, // Long timeout for model operations
},
}
}
// TestModel runs all tests for a single model
func (t *ModelTester) TestModel(ctx context.Context, modelTest ModelTest) TestResult {
result := TestResult{
ModelName: modelTest.Name,
StartTime: time.Now(),
Status: StatusPassed,
PromptTests: make([]PromptTest, 0),
}
// Pull model first
fmt.Printf("Pulling model %s...\n", modelTest.Name)
if err := t.pullModel(ctx, modelTest.Name); err != nil {
result.Status = StatusFailed
result.ErrorMessage = fmt.Sprintf("Failed to pull model: %v", err)
result.EndTime = time.Now()
result.Duration = result.EndTime.Sub(result.StartTime)
return result
}
fmt.Printf("Model %s pulled successfully\n", modelTest.Name)
// Run each prompt test
for i, prompt := range modelTest.Prompts {
fmt.Printf("Testing prompt %d/%d for %s\n", i+1, len(modelTest.Prompts), modelTest.Name)
promptTest := t.testPrompt(ctx, modelTest.Name, prompt, modelTest.Timeout)
result.PromptTests = append(result.PromptTests, promptTest)
// Update overall status based on prompt test result
if promptTest.Status == StatusFailed {
result.Status = StatusFailed
}
}
result.EndTime = time.Now()
result.Duration = result.EndTime.Sub(result.StartTime)
fmt.Printf("Model %s test completed: %s\n", modelTest.Name, result.Status)
return result
}
// pullModel pulls a model using the Ollama API
func (t *ModelTester) pullModel(ctx context.Context, modelName string) error {
url := t.serverURL + "/api/pull"
reqBody := map[string]interface{}{
"name": modelName,
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := t.httpClient.Do(req)
if err != nil {
return fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("pull failed with status %d: %s", resp.StatusCode, string(body))
}
// Read response stream (pull progress)
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
var progress map[string]interface{}
if err := json.Unmarshal(scanner.Bytes(), &progress); err != nil {
continue
}
// Could print progress here if verbose mode is enabled
}
return nil
}
// testPrompt tests a single prompt
func (t *ModelTester) testPrompt(ctx context.Context, modelName, prompt string, timeout time.Duration) PromptTest {
result := PromptTest{
Prompt: prompt,
Status: StatusPassed,
}
startTime := time.Now()
// Create context with timeout
testCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// Call chat API
response, err := t.chat(testCtx, modelName, prompt)
if err != nil {
result.Status = StatusFailed
result.ErrorMessage = err.Error()
result.Duration = time.Since(startTime)
return result
}
result.Response = response
result.ResponseTokens = estimateTokens(response)
result.Duration = time.Since(startTime)
return result
}
// chat sends a chat request to the ollama API
func (t *ModelTester) chat(ctx context.Context, modelName, prompt string) (string, error) {
url := t.serverURL + "/api/generate"
reqBody := map[string]interface{}{
"model": modelName,
"prompt": prompt,
"stream": false,
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return "", fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := t.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("chat failed with status %d: %s", resp.StatusCode, string(body))
}
var response struct {
Response string `json:"response"`
}
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
return "", fmt.Errorf("failed to decode response: %w", err)
}
return response.Response, nil
}
// estimateTokens estimates the number of tokens in a text
// This is a rough approximation
func estimateTokens(text string) int {
// Rough estimate: 1 token ≈ 4 characters on average
words := strings.Fields(text)
return len(words)
}