mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-18 03:37:09 +00:00
Add GitHub Actions CI/CD pipeline and test framework
- Add .github/workflows/build-test.yml for automated testing - Add tests/ directory with TypeScript test runner - Add docs/CICD.md documentation - Remove .gitlab-ci.yml (migrated to GitHub Actions) - Update .gitignore for test artifacts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
1426
tests/package-lock.json
generated
Normal file
1426
tests/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
33
tests/package.json
Normal file
33
tests/package.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "ollama37-test-runner",
|
||||
"version": "1.0.0",
|
||||
"description": "Scalable test runner with LLM-as-judge for ollama37",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"bin": {
|
||||
"ollama37-test": "dist/cli.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"start": "node dist/cli.js",
|
||||
"dev": "tsx src/cli.ts",
|
||||
"test": "tsx src/cli.ts run",
|
||||
"test:build": "tsx src/cli.ts run --suite build",
|
||||
"test:runtime": "tsx src/cli.ts run --suite runtime",
|
||||
"test:inference": "tsx src/cli.ts run --suite inference"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.7.2",
|
||||
"chalk": "^5.3.0",
|
||||
"commander": "^12.1.0",
|
||||
"glob": "^10.3.10",
|
||||
"js-yaml": "^4.1.0",
|
||||
"p-limit": "^5.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
"@types/node": "^20.14.0",
|
||||
"tsx": "^4.16.0",
|
||||
"typescript": "^5.5.0"
|
||||
}
|
||||
}
|
||||
165
tests/src/cli.ts
Normal file
165
tests/src/cli.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { Command } from 'commander'
|
||||
import { writeFileSync } from 'fs'
|
||||
import path from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { TestLoader } from './loader.js'
|
||||
import { TestExecutor } from './executor.js'
|
||||
import { LLMJudge } from './judge.js'
|
||||
import { Reporter, TestLinkReporter } from './reporter.js'
|
||||
import { RunnerOptions } from './types.js'
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
||||
const defaultTestcasesDir = path.join(__dirname, '..', 'testcases')
|
||||
|
||||
const program = new Command()
|
||||
|
||||
program
|
||||
.name('ollama37-test')
|
||||
.description('Scalable test runner with LLM-as-judge for ollama37')
|
||||
.version('1.0.0')
|
||||
|
||||
program
|
||||
.command('run')
|
||||
.description('Run test cases')
|
||||
.option('-s, --suite <suite>', 'Run only tests in specified suite (build, runtime, inference)')
|
||||
.option('-i, --id <id>', 'Run only specified test case by ID')
|
||||
.option('-w, --workers <n>', 'Number of parallel workers', '1')
|
||||
.option('-d, --dry-run', 'Show what would be executed without running')
|
||||
.option('-o, --output <format>', 'Output format: console, json, junit', 'console')
|
||||
.option('--report-testlink', 'Report results to TestLink')
|
||||
.option('--ollama-url <url>', 'Ollama server URL', 'http://localhost:11434')
|
||||
.option('--ollama-model <model>', 'Ollama model for judging', 'gemma3:4b')
|
||||
.option('--testlink-url <url>', 'TestLink server URL', 'http://localhost:8090')
|
||||
.option('--testlink-api-key <key>', 'TestLink API key')
|
||||
.option('--no-llm', 'Skip LLM judging, use simple exit code check')
|
||||
.option('--testcases-dir <dir>', 'Test cases directory', defaultTestcasesDir)
|
||||
.action(async (options) => {
|
||||
console.log('='.repeat(60))
|
||||
console.log('OLLAMA37 TEST RUNNER')
|
||||
console.log('='.repeat(60))
|
||||
|
||||
const loader = new TestLoader(options.testcasesDir)
|
||||
const executor = new TestExecutor(path.join(__dirname, '..', '..'))
|
||||
const judge = new LLMJudge(options.ollamaUrl, options.ollamaModel)
|
||||
|
||||
// Load test cases
|
||||
console.log('\nLoading test cases...')
|
||||
let testCases = await loader.loadAll()
|
||||
|
||||
if (options.suite) {
|
||||
testCases = testCases.filter(tc => tc.suite === options.suite)
|
||||
console.log(` Filtered by suite: ${options.suite}`)
|
||||
}
|
||||
|
||||
if (options.id) {
|
||||
testCases = testCases.filter(tc => tc.id === options.id)
|
||||
console.log(` Filtered by ID: ${options.id}`)
|
||||
}
|
||||
|
||||
// Sort by dependencies
|
||||
testCases = loader.sortByDependencies(testCases)
|
||||
|
||||
console.log(` Found ${testCases.length} test cases`)
|
||||
|
||||
if (testCases.length === 0) {
|
||||
console.log('\nNo test cases found!')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
// Dry run
|
||||
if (options.dryRun) {
|
||||
console.log('\nDRY RUN - Would execute:')
|
||||
for (const tc of testCases) {
|
||||
console.log(` ${tc.id}: ${tc.name}`)
|
||||
for (const step of tc.steps) {
|
||||
console.log(` - ${step.name}: ${step.command}`)
|
||||
}
|
||||
}
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
// Execute tests
|
||||
console.log('\nExecuting tests...')
|
||||
const workers = parseInt(options.workers)
|
||||
const results = await executor.executeAll(testCases, workers)
|
||||
|
||||
// Judge results
|
||||
console.log('\nJudging results...')
|
||||
let judgments
|
||||
if (options.llm === false) {
|
||||
console.log(' Using simple exit code check (--no-llm)')
|
||||
judgments = results.map(r => judge.simpleJudge(r))
|
||||
} else {
|
||||
try {
|
||||
judgments = await judge.judgeResults(results)
|
||||
} catch (error) {
|
||||
console.error(' LLM judging failed, falling back to simple check:', error)
|
||||
judgments = results.map(r => judge.simpleJudge(r))
|
||||
}
|
||||
}
|
||||
|
||||
// Create reports
|
||||
const reports = Reporter.createReports(results, judgments)
|
||||
|
||||
// Output results
|
||||
switch (options.output) {
|
||||
case 'json':
|
||||
const json = Reporter.toJSON(reports)
|
||||
console.log(json)
|
||||
writeFileSync('test-results.json', json)
|
||||
console.log('\nResults written to test-results.json')
|
||||
break
|
||||
|
||||
case 'junit':
|
||||
const junit = Reporter.toJUnit(reports)
|
||||
writeFileSync('test-results.xml', junit)
|
||||
console.log('\nResults written to test-results.xml')
|
||||
break
|
||||
|
||||
case 'console':
|
||||
default:
|
||||
Reporter.toConsole(reports)
|
||||
break
|
||||
}
|
||||
|
||||
// Report to TestLink
|
||||
if (options.reportTestlink && options.testlinkApiKey) {
|
||||
const testlinkReporter = new TestLinkReporter(
|
||||
options.testlinkUrl,
|
||||
options.testlinkApiKey
|
||||
)
|
||||
// Would need plan ID and build ID
|
||||
// await testlinkReporter.reportResults(reports, planId, buildId)
|
||||
console.log('\nTestLink reporting not yet implemented')
|
||||
}
|
||||
|
||||
// Exit with appropriate code
|
||||
const failed = reports.filter(r => !r.pass).length
|
||||
process.exit(failed > 0 ? 1 : 0)
|
||||
})
|
||||
|
||||
program
|
||||
.command('list')
|
||||
.description('List all test cases')
|
||||
.option('--testcases-dir <dir>', 'Test cases directory', defaultTestcasesDir)
|
||||
.action(async (options) => {
|
||||
const loader = new TestLoader(options.testcasesDir)
|
||||
const testCases = await loader.loadAll()
|
||||
|
||||
const grouped = loader.groupBySuite(testCases)
|
||||
|
||||
console.log('Available Test Cases:\n')
|
||||
for (const [suite, cases] of grouped) {
|
||||
console.log(`${suite.toUpperCase()}:`)
|
||||
for (const tc of cases) {
|
||||
console.log(` ${tc.id}: ${tc.name}`)
|
||||
}
|
||||
console.log()
|
||||
}
|
||||
|
||||
console.log(`Total: ${testCases.length} test cases`)
|
||||
})
|
||||
|
||||
program.parse()
|
||||
119
tests/src/executor.ts
Normal file
119
tests/src/executor.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import { exec } from 'child_process'
|
||||
import { promisify } from 'util'
|
||||
import { TestCase, TestResult, StepResult } from './types.js'
|
||||
|
||||
const execAsync = promisify(exec)
|
||||
|
||||
export class TestExecutor {
|
||||
private workingDir: string
|
||||
|
||||
constructor(workingDir: string = process.cwd()) {
|
||||
this.workingDir = workingDir
|
||||
}
|
||||
|
||||
async executeStep(command: string, timeout: number): Promise<StepResult> {
|
||||
const startTime = Date.now()
|
||||
let stdout = ''
|
||||
let stderr = ''
|
||||
let exitCode = 0
|
||||
|
||||
try {
|
||||
const result = await execAsync(command, {
|
||||
cwd: this.workingDir,
|
||||
timeout,
|
||||
maxBuffer: 10 * 1024 * 1024, // 10MB buffer
|
||||
shell: '/bin/bash'
|
||||
})
|
||||
stdout = result.stdout
|
||||
stderr = result.stderr
|
||||
} catch (error: any) {
|
||||
stdout = error.stdout || ''
|
||||
stderr = error.stderr || error.message || 'Unknown error'
|
||||
exitCode = error.code || 1
|
||||
}
|
||||
|
||||
const duration = Date.now() - startTime
|
||||
|
||||
return {
|
||||
name: '',
|
||||
command,
|
||||
stdout,
|
||||
stderr,
|
||||
exitCode,
|
||||
duration
|
||||
}
|
||||
}
|
||||
|
||||
async executeTestCase(testCase: TestCase): Promise<TestResult> {
|
||||
const startTime = Date.now()
|
||||
const stepResults: StepResult[] = []
|
||||
|
||||
console.log(` Executing: ${testCase.id} - ${testCase.name}`)
|
||||
|
||||
for (const step of testCase.steps) {
|
||||
console.log(` Step: ${step.name}`)
|
||||
|
||||
const timeout = step.timeout || testCase.timeout
|
||||
const result = await this.executeStep(step.command, timeout)
|
||||
result.name = step.name
|
||||
|
||||
stepResults.push(result)
|
||||
|
||||
// Log step result
|
||||
if (result.exitCode === 0) {
|
||||
console.log(` Exit: ${result.exitCode} (${result.duration}ms)`)
|
||||
} else {
|
||||
console.log(` Exit: ${result.exitCode} (FAILED, ${result.duration}ms)`)
|
||||
}
|
||||
}
|
||||
|
||||
const totalDuration = Date.now() - startTime
|
||||
|
||||
// Combine all logs
|
||||
const logs = stepResults.map(r => {
|
||||
return `=== Step: ${r.name} ===
|
||||
Command: ${r.command}
|
||||
Exit Code: ${r.exitCode}
|
||||
Duration: ${r.duration}ms
|
||||
|
||||
STDOUT:
|
||||
${r.stdout || '(empty)'}
|
||||
|
||||
STDERR:
|
||||
${r.stderr || '(empty)'}
|
||||
`
|
||||
}).join('\n' + '='.repeat(50) + '\n')
|
||||
|
||||
return {
|
||||
testCase,
|
||||
steps: stepResults,
|
||||
totalDuration,
|
||||
logs
|
||||
}
|
||||
}
|
||||
|
||||
async executeAll(testCases: TestCase[], concurrency: number = 1): Promise<TestResult[]> {
|
||||
const results: TestResult[] = []
|
||||
|
||||
if (concurrency === 1) {
|
||||
// Sequential execution
|
||||
for (const tc of testCases) {
|
||||
const result = await this.executeTestCase(tc)
|
||||
results.push(result)
|
||||
}
|
||||
} else {
|
||||
// Parallel execution with p-limit
|
||||
const pLimit = (await import('p-limit')).default
|
||||
const limit = pLimit(concurrency)
|
||||
|
||||
const promises = testCases.map(tc =>
|
||||
limit(() => this.executeTestCase(tc))
|
||||
)
|
||||
|
||||
const parallelResults = await Promise.all(promises)
|
||||
results.push(...parallelResults)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
}
|
||||
146
tests/src/judge.ts
Normal file
146
tests/src/judge.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import axios from 'axios'
|
||||
import { TestResult, Judgment } from './types.js'
|
||||
|
||||
export class LLMJudge {
|
||||
private ollamaUrl: string
|
||||
private model: string
|
||||
private batchSize: number
|
||||
|
||||
constructor(ollamaUrl: string = 'http://localhost:11434', model: string = 'gemma3:4b') {
|
||||
this.ollamaUrl = ollamaUrl
|
||||
this.model = model
|
||||
this.batchSize = 5 // Judge 5 tests per LLM call
|
||||
}
|
||||
|
||||
private buildPrompt(results: TestResult[]): string {
|
||||
const testsSection = results.map((r, i) => {
|
||||
return `
|
||||
### Test ${i + 1}: ${r.testCase.id} - ${r.testCase.name}
|
||||
|
||||
**Criteria:**
|
||||
${r.testCase.criteria}
|
||||
|
||||
**Execution Logs:**
|
||||
\`\`\`
|
||||
${r.logs.substring(0, 3000)}${r.logs.length > 3000 ? '\n... (truncated)' : ''}
|
||||
\`\`\`
|
||||
`
|
||||
}).join('\n---\n')
|
||||
|
||||
return `You are a test evaluation judge. Analyze the following test results and determine if each test passed or failed based on the criteria provided.
|
||||
|
||||
For each test, examine:
|
||||
1. The expected criteria
|
||||
2. The actual execution logs (stdout, stderr, exit codes)
|
||||
3. Whether the output meets the criteria (use fuzzy matching for AI outputs)
|
||||
|
||||
${testsSection}
|
||||
|
||||
Respond with a JSON array containing one object per test:
|
||||
[
|
||||
{"testId": "TC-XXX-001", "pass": true, "reason": "Brief explanation"},
|
||||
{"testId": "TC-XXX-002", "pass": false, "reason": "Brief explanation"}
|
||||
]
|
||||
|
||||
Important:
|
||||
- For AI-generated text, accept reasonable variations (e.g., "4", "four", "The answer is 4" are all valid for math questions)
|
||||
- For build/runtime tests, check exit codes and absence of error messages
|
||||
- Be lenient with formatting differences, focus on semantic correctness
|
||||
|
||||
Respond ONLY with the JSON array, no other text.`
|
||||
}
|
||||
|
||||
async judgeResults(results: TestResult[]): Promise<Judgment[]> {
|
||||
const allJudgments: Judgment[] = []
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < results.length; i += this.batchSize) {
|
||||
const batch = results.slice(i, i + this.batchSize)
|
||||
console.log(` Judging batch ${Math.floor(i / this.batchSize) + 1}/${Math.ceil(results.length / this.batchSize)}...`)
|
||||
|
||||
try {
|
||||
const judgments = await this.judgeBatch(batch)
|
||||
allJudgments.push(...judgments)
|
||||
} catch (error) {
|
||||
console.error(` Failed to judge batch:`, error)
|
||||
// Mark all tests in batch as failed
|
||||
for (const r of batch) {
|
||||
allJudgments.push({
|
||||
testId: r.testCase.id,
|
||||
pass: false,
|
||||
reason: 'LLM judgment failed: ' + String(error)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allJudgments
|
||||
}
|
||||
|
||||
private async judgeBatch(results: TestResult[]): Promise<Judgment[]> {
|
||||
const prompt = this.buildPrompt(results)
|
||||
|
||||
const response = await axios.post(`${this.ollamaUrl}/api/generate`, {
|
||||
model: this.model,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.1, // Low temperature for consistent judging
|
||||
num_predict: 1000
|
||||
}
|
||||
}, {
|
||||
timeout: 120000 // 2 minute timeout
|
||||
})
|
||||
|
||||
const responseText = response.data.response
|
||||
|
||||
// Extract JSON from response
|
||||
const jsonMatch = responseText.match(/\[[\s\S]*\]/)
|
||||
if (!jsonMatch) {
|
||||
throw new Error('No JSON array found in LLM response')
|
||||
}
|
||||
|
||||
try {
|
||||
const judgments = JSON.parse(jsonMatch[0]) as Judgment[]
|
||||
|
||||
// Validate and fill missing
|
||||
const resultIds = results.map(r => r.testCase.id)
|
||||
const judgedIds = new Set(judgments.map(j => j.testId))
|
||||
|
||||
// Add missing judgments
|
||||
for (const id of resultIds) {
|
||||
if (!judgedIds.has(id)) {
|
||||
judgments.push({
|
||||
testId: id,
|
||||
pass: false,
|
||||
reason: 'No judgment provided by LLM'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return judgments
|
||||
} catch (parseError) {
|
||||
throw new Error(`Failed to parse LLM response: ${responseText.substring(0, 200)}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Simple rule-based judgment (no LLM)
|
||||
simpleJudge(result: TestResult): Judgment {
|
||||
const allStepsPassed = result.steps.every(s => s.exitCode === 0)
|
||||
|
||||
if (allStepsPassed) {
|
||||
return {
|
||||
testId: result.testCase.id,
|
||||
pass: true,
|
||||
reason: 'All steps completed with exit code 0'
|
||||
}
|
||||
} else {
|
||||
const failedSteps = result.steps.filter(s => s.exitCode !== 0)
|
||||
return {
|
||||
testId: result.testCase.id,
|
||||
pass: false,
|
||||
reason: `Steps failed: ${failedSteps.map(s => s.name).join(', ')}`
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
91
tests/src/loader.ts
Normal file
91
tests/src/loader.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
import { readFileSync } from 'fs'
|
||||
import { glob } from 'glob'
|
||||
import yaml from 'js-yaml'
|
||||
import path from 'path'
|
||||
import { TestCase } from './types.js'
|
||||
|
||||
export class TestLoader {
|
||||
private testcasesDir: string
|
||||
|
||||
constructor(testcasesDir: string = './testcases') {
|
||||
this.testcasesDir = testcasesDir
|
||||
}
|
||||
|
||||
async loadAll(): Promise<TestCase[]> {
|
||||
const pattern = path.join(this.testcasesDir, '**/*.yml')
|
||||
const files = await glob(pattern)
|
||||
|
||||
const testCases: TestCase[] = []
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = readFileSync(file, 'utf-8')
|
||||
const testCase = yaml.load(content) as TestCase
|
||||
|
||||
// Set defaults
|
||||
testCase.timeout = testCase.timeout || 60000
|
||||
testCase.dependencies = testCase.dependencies || []
|
||||
testCase.priority = testCase.priority || 1
|
||||
|
||||
testCases.push(testCase)
|
||||
} catch (error) {
|
||||
console.error(`Failed to load ${file}:`, error)
|
||||
}
|
||||
}
|
||||
|
||||
return testCases
|
||||
}
|
||||
|
||||
async loadBySuite(suite: string): Promise<TestCase[]> {
|
||||
const all = await this.loadAll()
|
||||
return all.filter(tc => tc.suite === suite)
|
||||
}
|
||||
|
||||
async loadById(id: string): Promise<TestCase | undefined> {
|
||||
const all = await this.loadAll()
|
||||
return all.find(tc => tc.id === id)
|
||||
}
|
||||
|
||||
// Sort test cases by dependencies (topological sort)
|
||||
sortByDependencies(testCases: TestCase[]): TestCase[] {
|
||||
const sorted: TestCase[] = []
|
||||
const visited = new Set<string>()
|
||||
const idMap = new Map(testCases.map(tc => [tc.id, tc]))
|
||||
|
||||
const visit = (tc: TestCase) => {
|
||||
if (visited.has(tc.id)) return
|
||||
visited.add(tc.id)
|
||||
|
||||
// Visit dependencies first
|
||||
for (const depId of tc.dependencies) {
|
||||
const dep = idMap.get(depId)
|
||||
if (dep) visit(dep)
|
||||
}
|
||||
|
||||
sorted.push(tc)
|
||||
}
|
||||
|
||||
// Sort by priority first, then by dependencies
|
||||
const byPriority = [...testCases].sort((a, b) => a.priority - b.priority)
|
||||
for (const tc of byPriority) {
|
||||
visit(tc)
|
||||
}
|
||||
|
||||
return sorted
|
||||
}
|
||||
|
||||
// Group test cases by suite for parallel execution
|
||||
groupBySuite(testCases: TestCase[]): Map<string, TestCase[]> {
|
||||
const groups = new Map<string, TestCase[]>()
|
||||
|
||||
for (const tc of testCases) {
|
||||
const suite = tc.suite
|
||||
if (!groups.has(suite)) {
|
||||
groups.set(suite, [])
|
||||
}
|
||||
groups.get(suite)!.push(tc)
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
||||
}
|
||||
138
tests/src/reporter.ts
Normal file
138
tests/src/reporter.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import axios from 'axios'
|
||||
import { TestReport, Judgment, TestResult } from './types.js'
|
||||
|
||||
export class Reporter {
|
||||
// Console reporter
|
||||
static toConsole(reports: TestReport[]): void {
|
||||
console.log('\n' + '='.repeat(60))
|
||||
console.log('TEST RESULTS')
|
||||
console.log('='.repeat(60))
|
||||
|
||||
const passed = reports.filter(r => r.pass)
|
||||
const failed = reports.filter(r => !r.pass)
|
||||
|
||||
for (const report of reports) {
|
||||
const status = report.pass ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m'
|
||||
console.log(`[${status}] ${report.testId}: ${report.name}`)
|
||||
console.log(` Reason: ${report.reason}`)
|
||||
console.log(` Duration: ${report.duration}ms`)
|
||||
}
|
||||
|
||||
console.log('\n' + '-'.repeat(60))
|
||||
console.log(`Total: ${reports.length} | Passed: ${passed.length} | Failed: ${failed.length}`)
|
||||
console.log('='.repeat(60))
|
||||
}
|
||||
|
||||
// JSON reporter
|
||||
static toJSON(reports: TestReport[]): string {
|
||||
return JSON.stringify({
|
||||
summary: {
|
||||
total: reports.length,
|
||||
passed: reports.filter(r => r.pass).length,
|
||||
failed: reports.filter(r => !r.pass).length,
|
||||
timestamp: new Date().toISOString()
|
||||
},
|
||||
results: reports
|
||||
}, null, 2)
|
||||
}
|
||||
|
||||
// JUnit XML reporter (for CI/CD integration)
|
||||
static toJUnit(reports: TestReport[]): string {
|
||||
const escapeXml = (s: string) => s
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''')
|
||||
|
||||
const testcases = reports.map(r => {
|
||||
if (r.pass) {
|
||||
return ` <testcase name="${escapeXml(r.testId)}: ${escapeXml(r.name)}" classname="${r.suite}" time="${r.duration / 1000}"/>`
|
||||
} else {
|
||||
return ` <testcase name="${escapeXml(r.testId)}: ${escapeXml(r.name)}" classname="${r.suite}" time="${r.duration / 1000}">
|
||||
<failure message="${escapeXml(r.reason)}">${escapeXml(r.logs.substring(0, 1000))}</failure>
|
||||
</testcase>`
|
||||
}
|
||||
}).join('\n')
|
||||
|
||||
const failures = reports.filter(r => !r.pass).length
|
||||
const time = reports.reduce((sum, r) => sum + r.duration, 0) / 1000
|
||||
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<testsuite name="ollama37-tests" tests="${reports.length}" failures="${failures}" time="${time}">
|
||||
${testcases}
|
||||
</testsuite>`
|
||||
}
|
||||
|
||||
// Combine results and judgments into reports
|
||||
static createReports(results: TestResult[], judgments: Judgment[]): TestReport[] {
|
||||
const judgmentMap = new Map(judgments.map(j => [j.testId, j]))
|
||||
|
||||
return results.map(result => {
|
||||
const judgment = judgmentMap.get(result.testCase.id)
|
||||
|
||||
return {
|
||||
testId: result.testCase.id,
|
||||
name: result.testCase.name,
|
||||
suite: result.testCase.suite,
|
||||
pass: judgment?.pass ?? false,
|
||||
reason: judgment?.reason ?? 'No judgment',
|
||||
duration: result.totalDuration,
|
||||
logs: result.logs
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestLink reporter
|
||||
export class TestLinkReporter {
|
||||
private url: string
|
||||
private apiKey: string
|
||||
|
||||
constructor(url: string, apiKey: string) {
|
||||
this.url = url
|
||||
this.apiKey = apiKey
|
||||
}
|
||||
|
||||
async reportResults(
|
||||
reports: TestReport[],
|
||||
planId: string,
|
||||
buildId: string
|
||||
): Promise<void> {
|
||||
console.log('\nReporting to TestLink...')
|
||||
|
||||
for (const report of reports) {
|
||||
try {
|
||||
await this.reportTestExecution(report, planId, buildId)
|
||||
console.log(` Reported: ${report.testId}`)
|
||||
} catch (error) {
|
||||
console.error(` Failed to report ${report.testId}:`, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async reportTestExecution(
|
||||
report: TestReport,
|
||||
planId: string,
|
||||
buildId: string
|
||||
): Promise<void> {
|
||||
// Extract numeric test case ID from external ID (e.g., "ollama37-8" -> need internal ID)
|
||||
// This would need to be mapped from TestLink
|
||||
|
||||
const status = report.pass ? 'p' : 'f' // p=passed, f=failed, b=blocked
|
||||
|
||||
// Note: This uses the TestLink XML-RPC API
|
||||
// In practice, you'd use the testlink-mcp or direct API calls
|
||||
const payload = {
|
||||
devKey: this.apiKey,
|
||||
testcaseexternalid: report.testId,
|
||||
testplanid: planId,
|
||||
buildid: buildId,
|
||||
status,
|
||||
notes: `${report.reason}\n\nDuration: ${report.duration}ms\n\nLogs:\n${report.logs.substring(0, 4000)}`
|
||||
}
|
||||
|
||||
// For now, just log - actual implementation would call TestLink API
|
||||
console.log(` Would report: ${report.testId} = ${status}`)
|
||||
}
|
||||
}
|
||||
66
tests/src/types.ts
Normal file
66
tests/src/types.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
// Test case definition
|
||||
export interface TestStep {
|
||||
name: string
|
||||
command: string
|
||||
timeout?: number
|
||||
}
|
||||
|
||||
export interface TestCase {
|
||||
id: string
|
||||
name: string
|
||||
suite: string
|
||||
priority: number
|
||||
timeout: number
|
||||
dependencies: string[]
|
||||
steps: TestStep[]
|
||||
criteria: string
|
||||
}
|
||||
|
||||
// Execution results
|
||||
export interface StepResult {
|
||||
name: string
|
||||
command: string
|
||||
stdout: string
|
||||
stderr: string
|
||||
exitCode: number
|
||||
duration: number
|
||||
}
|
||||
|
||||
export interface TestResult {
|
||||
testCase: TestCase
|
||||
steps: StepResult[]
|
||||
totalDuration: number
|
||||
logs: string
|
||||
}
|
||||
|
||||
// LLM judgment
|
||||
export interface Judgment {
|
||||
testId: string
|
||||
pass: boolean
|
||||
reason: string
|
||||
}
|
||||
|
||||
// Final report
|
||||
export interface TestReport {
|
||||
testId: string
|
||||
name: string
|
||||
suite: string
|
||||
pass: boolean
|
||||
reason: string
|
||||
duration: number
|
||||
logs: string
|
||||
}
|
||||
|
||||
// Runner options
|
||||
export interface RunnerOptions {
|
||||
suite?: string
|
||||
id?: string
|
||||
workers: number
|
||||
dryRun: boolean
|
||||
output: 'console' | 'json' | 'junit'
|
||||
reportTestlink: boolean
|
||||
ollamaUrl: string
|
||||
ollamaModel: string
|
||||
testlinkUrl: string
|
||||
testlinkApiKey: string
|
||||
}
|
||||
31
tests/testcases/build/TC-BUILD-001.yml
Normal file
31
tests/testcases/build/TC-BUILD-001.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
id: TC-BUILD-001
|
||||
name: Builder Image Verification
|
||||
suite: build
|
||||
priority: 1
|
||||
timeout: 120000
|
||||
|
||||
dependencies: []
|
||||
|
||||
steps:
|
||||
- name: Check image exists
|
||||
command: docker images ollama37-builder:latest --format '{{.Repository}}:{{.Tag}}'
|
||||
|
||||
- name: Verify CUDA toolkit
|
||||
command: docker run --rm ollama37-builder:latest nvcc --version
|
||||
|
||||
- name: Verify GCC version
|
||||
command: docker run --rm ollama37-builder:latest gcc --version | head -1
|
||||
|
||||
- name: Verify Go version
|
||||
command: docker run --rm ollama37-builder:latest go version
|
||||
|
||||
criteria: |
|
||||
All commands should succeed (exit code 0).
|
||||
|
||||
Expected outputs:
|
||||
- Image exists: should show "ollama37-builder:latest"
|
||||
- CUDA: should show version 11.4 (accept 11.4.x)
|
||||
- GCC: should show version 10 (accept GCC 10.x)
|
||||
- Go: should show version 1.25 or higher
|
||||
|
||||
Accept minor version variations. Focus on major versions being correct.
|
||||
27
tests/testcases/build/TC-BUILD-002.yml
Normal file
27
tests/testcases/build/TC-BUILD-002.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
id: TC-BUILD-002
|
||||
name: Runtime Image Build
|
||||
suite: build
|
||||
priority: 2
|
||||
timeout: 900000
|
||||
|
||||
dependencies:
|
||||
- TC-BUILD-001
|
||||
|
||||
steps:
|
||||
- name: Build runtime image
|
||||
command: cd docker && make build-runtime-no-cache 2>&1 | tail -50
|
||||
timeout: 900000
|
||||
|
||||
- name: Verify runtime image exists
|
||||
command: docker images ollama37:latest --format '{{.Repository}}:{{.Tag}} {{.Size}}'
|
||||
|
||||
criteria: |
|
||||
The runtime Docker image should build successfully from GitHub source.
|
||||
|
||||
Expected:
|
||||
- Build completes without fatal errors
|
||||
- Final output should mention "successfully" or similar completion message
|
||||
- Runtime image "ollama37:latest" should exist after build
|
||||
- Image size should be substantial (>10GB is expected due to CUDA)
|
||||
|
||||
Accept build warnings. Only fail on actual build errors.
|
||||
25
tests/testcases/build/TC-BUILD-003.yml
Normal file
25
tests/testcases/build/TC-BUILD-003.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
id: TC-BUILD-003
|
||||
name: Image Size Validation
|
||||
suite: build
|
||||
priority: 3
|
||||
timeout: 30000
|
||||
|
||||
dependencies:
|
||||
- TC-BUILD-002
|
||||
|
||||
steps:
|
||||
- name: Check builder image size
|
||||
command: docker images ollama37-builder:latest --format '{{.Size}}'
|
||||
|
||||
- name: Check runtime image size
|
||||
command: docker images ollama37:latest --format '{{.Size}}'
|
||||
|
||||
criteria: |
|
||||
Docker images should be within expected size ranges.
|
||||
|
||||
Expected:
|
||||
- Builder image: 10GB to 20GB (contains CUDA, GCC, CMake, Go)
|
||||
- Runtime image: 15GB to 25GB (contains builder + compiled ollama)
|
||||
|
||||
These are large images due to CUDA toolkit and build tools.
|
||||
Accept sizes within reasonable range of expectations.
|
||||
30
tests/testcases/inference/TC-INFERENCE-001.yml
Normal file
30
tests/testcases/inference/TC-INFERENCE-001.yml
Normal file
@@ -0,0 +1,30 @@
|
||||
id: TC-INFERENCE-001
|
||||
name: Model Pull
|
||||
suite: inference
|
||||
priority: 1
|
||||
timeout: 600000
|
||||
|
||||
dependencies:
|
||||
- TC-RUNTIME-003
|
||||
|
||||
steps:
|
||||
- name: Check if model exists
|
||||
command: docker exec ollama37 ollama list | grep -q "gemma3:4b" && echo "Model exists" || echo "Model not found"
|
||||
|
||||
- name: Pull model if needed
|
||||
command: docker exec ollama37 ollama list | grep -q "gemma3:4b" || docker exec ollama37 ollama pull gemma3:4b
|
||||
timeout: 600000
|
||||
|
||||
- name: Verify model available
|
||||
command: docker exec ollama37 ollama list
|
||||
|
||||
criteria: |
|
||||
The gemma3:4b model should be available for inference.
|
||||
|
||||
Expected:
|
||||
- Model is either already present or successfully downloaded
|
||||
- "ollama list" shows gemma3:4b in the output
|
||||
- No download errors
|
||||
|
||||
Accept if model already exists (skip download).
|
||||
Model size is ~3GB, download may take time.
|
||||
28
tests/testcases/inference/TC-INFERENCE-002.yml
Normal file
28
tests/testcases/inference/TC-INFERENCE-002.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
id: TC-INFERENCE-002
|
||||
name: Basic Inference
|
||||
suite: inference
|
||||
priority: 2
|
||||
timeout: 180000
|
||||
|
||||
dependencies:
|
||||
- TC-INFERENCE-001
|
||||
|
||||
steps:
|
||||
- name: Run simple math question
|
||||
command: docker exec ollama37 ollama run gemma3:4b "What is 2+2? Answer with just the number." 2>&1
|
||||
timeout: 120000
|
||||
|
||||
- name: Check GPU memory usage
|
||||
command: docker exec ollama37 nvidia-smi --query-compute-apps=pid,used_memory --format=csv 2>/dev/null || echo "No GPU processes"
|
||||
|
||||
criteria: |
|
||||
Basic inference should work on Tesla K80.
|
||||
|
||||
Expected:
|
||||
- Model responds to the math question
|
||||
- Response should indicate "4" (accept variations: "4", "four", "The answer is 4", etc.)
|
||||
- GPU memory should be allocated during inference
|
||||
- No CUDA errors in output
|
||||
|
||||
This is AI-generated output - accept reasonable variations.
|
||||
Focus on the model producing a coherent response.
|
||||
34
tests/testcases/inference/TC-INFERENCE-003.yml
Normal file
34
tests/testcases/inference/TC-INFERENCE-003.yml
Normal file
@@ -0,0 +1,34 @@
|
||||
id: TC-INFERENCE-003
|
||||
name: API Endpoint Test
|
||||
suite: inference
|
||||
priority: 3
|
||||
timeout: 120000
|
||||
|
||||
dependencies:
|
||||
- TC-INFERENCE-001
|
||||
|
||||
steps:
|
||||
- name: Test generate endpoint (non-streaming)
|
||||
command: |
|
||||
curl -s http://localhost:11434/api/generate \
|
||||
-d '{"model":"gemma3:4b","prompt":"Say hello in one word","stream":false}' \
|
||||
| head -c 500
|
||||
|
||||
- name: Test generate endpoint (streaming)
|
||||
command: |
|
||||
curl -s http://localhost:11434/api/generate \
|
||||
-d '{"model":"gemma3:4b","prompt":"Count from 1 to 3","stream":true}' \
|
||||
| head -5
|
||||
|
||||
criteria: |
|
||||
Ollama REST API should handle inference requests.
|
||||
|
||||
Expected for non-streaming:
|
||||
- Returns JSON with "response" field
|
||||
- Response contains some greeting (hello, hi, etc.)
|
||||
|
||||
Expected for streaming:
|
||||
- Returns multiple JSON lines
|
||||
- Each line contains partial response
|
||||
|
||||
Accept any valid JSON response. Content may vary.
|
||||
32
tests/testcases/inference/TC-INFERENCE-004.yml
Normal file
32
tests/testcases/inference/TC-INFERENCE-004.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
id: TC-INFERENCE-004
|
||||
name: CUBLAS Fallback Verification
|
||||
suite: inference
|
||||
priority: 4
|
||||
timeout: 120000
|
||||
|
||||
dependencies:
|
||||
- TC-INFERENCE-002
|
||||
|
||||
steps:
|
||||
- name: Check for CUBLAS errors in logs
|
||||
command: cd docker && docker compose logs 2>&1 | grep -i "CUBLAS_STATUS" | grep -v "SUCCESS" | head -10 || echo "No CUBLAS errors"
|
||||
|
||||
- name: Check compute capability detection
|
||||
command: cd docker && docker compose logs 2>&1 | grep -iE "compute|capability|cc.*3" | head -10 || echo "No compute capability logs"
|
||||
|
||||
- name: Verify no GPU errors
|
||||
command: cd docker && docker compose logs 2>&1 | grep -iE "error|fail" | grep -i gpu | head -10 || echo "No GPU errors"
|
||||
|
||||
criteria: |
|
||||
CUBLAS should work correctly on Tesla K80 using legacy fallback.
|
||||
|
||||
Expected:
|
||||
- No CUBLAS_STATUS_ARCH_MISMATCH errors
|
||||
- No CUBLAS_STATUS_NOT_SUPPORTED errors
|
||||
- Compute capability 3.7 may be mentioned in debug logs
|
||||
- No fatal GPU-related errors
|
||||
|
||||
The K80 uses legacy CUBLAS functions (cublasSgemmBatched)
|
||||
instead of modern Ex variants. This should work transparently.
|
||||
|
||||
Accept warnings. Only fail on actual CUBLAS errors.
|
||||
31
tests/testcases/runtime/TC-RUNTIME-001.yml
Normal file
31
tests/testcases/runtime/TC-RUNTIME-001.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
id: TC-RUNTIME-001
|
||||
name: Container Startup
|
||||
suite: runtime
|
||||
priority: 1
|
||||
timeout: 120000
|
||||
|
||||
dependencies:
|
||||
- TC-BUILD-002
|
||||
|
||||
steps:
|
||||
- name: Stop existing container
|
||||
command: cd docker && docker compose down 2>/dev/null || true
|
||||
|
||||
- name: Start container with GPU
|
||||
command: cd docker && docker compose up -d
|
||||
|
||||
- name: Wait for startup
|
||||
command: sleep 15
|
||||
|
||||
- name: Check container status
|
||||
command: cd docker && docker compose ps
|
||||
|
||||
criteria: |
|
||||
The ollama37 container should start successfully with GPU access.
|
||||
|
||||
Expected:
|
||||
- Container starts without errors
|
||||
- docker compose ps shows container in "Up" state
|
||||
- No "Exited" or "Restarting" status
|
||||
|
||||
Accept startup warnings. Container should be running.
|
||||
29
tests/testcases/runtime/TC-RUNTIME-002.yml
Normal file
29
tests/testcases/runtime/TC-RUNTIME-002.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
id: TC-RUNTIME-002
|
||||
name: GPU Detection
|
||||
suite: runtime
|
||||
priority: 2
|
||||
timeout: 60000
|
||||
|
||||
dependencies:
|
||||
- TC-RUNTIME-001
|
||||
|
||||
steps:
|
||||
- name: Check nvidia-smi inside container
|
||||
command: docker exec ollama37 nvidia-smi
|
||||
|
||||
- name: Check CUDA libraries
|
||||
command: docker exec ollama37 ldconfig -p | grep -i cuda | head -5
|
||||
|
||||
- name: Check Ollama GPU detection
|
||||
command: cd docker && docker compose logs 2>&1 | grep -i gpu | head -10
|
||||
|
||||
criteria: |
|
||||
Tesla K80 GPU should be detected inside the container.
|
||||
|
||||
Expected:
|
||||
- nvidia-smi shows Tesla K80 GPU(s)
|
||||
- Driver version 470.x (or compatible)
|
||||
- CUDA libraries are available (libcuda, libcublas, etc.)
|
||||
- Ollama logs mention GPU detection
|
||||
|
||||
The K80 has 12GB VRAM per GPU. Accept variations in reported memory.
|
||||
39
tests/testcases/runtime/TC-RUNTIME-003.yml
Normal file
39
tests/testcases/runtime/TC-RUNTIME-003.yml
Normal file
@@ -0,0 +1,39 @@
|
||||
id: TC-RUNTIME-003
|
||||
name: Health Check
|
||||
suite: runtime
|
||||
priority: 3
|
||||
timeout: 180000
|
||||
|
||||
dependencies:
|
||||
- TC-RUNTIME-001
|
||||
|
||||
steps:
|
||||
- name: Wait for health check
|
||||
command: |
|
||||
for i in {1..30}; do
|
||||
STATUS=$(docker inspect ollama37 --format='{{.State.Health.Status}}' 2>/dev/null || echo "starting")
|
||||
echo "Health status: $STATUS (attempt $i/30)"
|
||||
if [ "$STATUS" = "healthy" ]; then
|
||||
echo "Container is healthy"
|
||||
exit 0
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
echo "Health check timeout"
|
||||
exit 1
|
||||
|
||||
- name: Test API endpoint
|
||||
command: curl -s http://localhost:11434/api/tags
|
||||
|
||||
- name: Check Ollama version
|
||||
command: docker exec ollama37 ollama --version
|
||||
|
||||
criteria: |
|
||||
Ollama server should be healthy and API responsive.
|
||||
|
||||
Expected:
|
||||
- Container health status becomes "healthy"
|
||||
- /api/tags endpoint returns JSON response (even if empty models)
|
||||
- ollama --version shows version information
|
||||
|
||||
Accept any valid JSON response from API. Version format may vary.
|
||||
16
tests/tsconfig.json
Normal file
16
tests/tsconfig.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "node",
|
||||
"esModuleInterop": true,
|
||||
"strict": true,
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"declaration": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user