mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-12 00:37:04 +00:00
Implement Go-based test runner framework for Tesla K80 testing
Add comprehensive test orchestration framework: Test Runner (cmd/test-runner/): - config.go: YAML configuration loading and validation - server.go: Ollama server lifecycle management (start/stop/health checks) - monitor.go: Real-time log monitoring with pattern matching - test.go: Model testing via Ollama API (pull, chat, validation) - validate.go: Test result validation (GPU usage, response quality, log analysis) - report.go: Structured reporting (JSON and Markdown formats) - main.go: CLI interface with run/validate/list commands Test Configurations (test/config/): - models.yaml: Full test suite with quick/full/stress profiles - quick.yaml: Fast smoke test with gemma2:2b Updated Workflow: - tesla-k80-tests.yml: Use test-runner instead of shell scripts - Run quick tests first, then full tests if passing - Generate structured JSON reports for pass/fail checking - Upload test results as artifacts Features: - Multi-model testing with configurable profiles - API-based testing (not CLI commands) - Real-time log monitoring for GPU events and errors - Automatic validation of GPU loading and response quality - Structured JSON and Markdown reports - Graceful server lifecycle management - Interrupt handling (Ctrl+C cleanup) Addresses limitations of shell-based testing by providing: - Better error handling and reporting - Programmatic test orchestration - Reusable test framework - Clear pass/fail criteria - Detailed test metrics and timing
This commit is contained in:
164
cmd/test-runner/validate.go
Normal file
164
cmd/test-runner/validate.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Validator validates test results against configuration
|
||||
type Validator struct {
|
||||
config Validation
|
||||
logMonitor *LogMonitor
|
||||
}
|
||||
|
||||
// NewValidator creates a new validator
|
||||
func NewValidator(config Validation, logMonitor *LogMonitor) *Validator {
|
||||
return &Validator{
|
||||
config: config,
|
||||
logMonitor: logMonitor,
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateResult validates a test result
|
||||
func (v *Validator) ValidateResult(result *TestResult) {
|
||||
// Validate prompts
|
||||
for i := range result.PromptTests {
|
||||
v.validatePrompt(&result.PromptTests[i])
|
||||
}
|
||||
|
||||
// Check logs for errors and warnings
|
||||
if v.logMonitor != nil {
|
||||
v.validateLogs(result)
|
||||
}
|
||||
}
|
||||
|
||||
// validatePrompt validates a single prompt test
|
||||
func (v *Validator) validatePrompt(prompt *PromptTest) {
|
||||
// Already failed, skip
|
||||
if prompt.Status == StatusFailed {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if response is empty
|
||||
if strings.TrimSpace(prompt.Response) == "" {
|
||||
prompt.Status = StatusFailed
|
||||
prompt.ErrorMessage = "Response is empty"
|
||||
return
|
||||
}
|
||||
|
||||
// Check token count
|
||||
if prompt.ResponseTokens < 1 {
|
||||
prompt.Status = StatusFailed
|
||||
prompt.ErrorMessage = "Response has no tokens"
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// validateLogs validates log events
|
||||
func (v *Validator) validateLogs(result *TestResult) {
|
||||
// Check for error events
|
||||
errorEvents := v.logMonitor.GetEvents(EventError)
|
||||
if len(errorEvents) > 0 {
|
||||
result.Status = StatusFailed
|
||||
errorMessages := make([]string, len(errorEvents))
|
||||
for i, event := range errorEvents {
|
||||
errorMessages[i] = event.Line
|
||||
}
|
||||
if result.ErrorMessage == "" {
|
||||
result.ErrorMessage = fmt.Sprintf("Errors found in logs: %s", strings.Join(errorMessages, "; "))
|
||||
} else {
|
||||
result.ErrorMessage += fmt.Sprintf("; Log errors: %s", strings.Join(errorMessages, "; "))
|
||||
}
|
||||
}
|
||||
|
||||
// Check for warning events
|
||||
warningEvents := v.logMonitor.GetEvents(EventWarning)
|
||||
if len(warningEvents) > 0 {
|
||||
warnings := make([]string, len(warningEvents))
|
||||
for i, event := range warningEvents {
|
||||
warnings[i] = event.Line
|
||||
}
|
||||
result.Warnings = append(result.Warnings, warnings...)
|
||||
}
|
||||
|
||||
// Check if GPU was used (if required)
|
||||
if v.config.GPURequired {
|
||||
if !v.hasGPULoading() {
|
||||
result.Status = StatusFailed
|
||||
if result.ErrorMessage == "" {
|
||||
result.ErrorMessage = "GPU acceleration not detected in logs (GPU required)"
|
||||
} else {
|
||||
result.ErrorMessage += "; GPU acceleration not detected"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for CPU fallback (if single GPU preferred)
|
||||
if v.config.SingleGPUPreferred {
|
||||
if v.hasCPUFallback() {
|
||||
warning := "CPU fallback or multi-GPU split detected (single GPU preferred)"
|
||||
result.Warnings = append(result.Warnings, warning)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// hasGPULoading checks if logs indicate GPU loading
|
||||
func (v *Validator) hasGPULoading() bool {
|
||||
successEvents := v.logMonitor.GetEvents(EventSuccess)
|
||||
|
||||
// Look for patterns indicating GPU usage
|
||||
gpuPatterns := []string{
|
||||
"offload",
|
||||
"GPU",
|
||||
"CUDA",
|
||||
}
|
||||
|
||||
for _, event := range successEvents {
|
||||
line := strings.ToLower(event.Line)
|
||||
for _, pattern := range gpuPatterns {
|
||||
if strings.Contains(line, strings.ToLower(pattern)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// hasCPUFallback checks if logs indicate CPU fallback
|
||||
func (v *Validator) hasCPUFallback() bool {
|
||||
allEvents := v.logMonitor.GetAllEvents()
|
||||
|
||||
// Look for patterns indicating CPU usage or multi-GPU split
|
||||
cpuPatterns := []string{
|
||||
"CPU backend",
|
||||
"using CPU",
|
||||
"fallback",
|
||||
}
|
||||
|
||||
for _, event := range allEvents {
|
||||
line := strings.ToLower(event.Line)
|
||||
for _, pattern := range cpuPatterns {
|
||||
if strings.Contains(line, strings.ToLower(pattern)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// ValidateResponse validates a response against expected criteria
|
||||
func ValidateResponse(response string, minTokens, maxTokens int) error {
|
||||
tokens := estimateTokens(response)
|
||||
|
||||
if minTokens > 0 && tokens < minTokens {
|
||||
return fmt.Errorf("response too short: %d tokens (min: %d)", tokens, minTokens)
|
||||
}
|
||||
|
||||
if maxTokens > 0 && tokens > maxTokens {
|
||||
return fmt.Errorf("response too long: %d tokens (max: %d)", tokens, maxTokens)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user