diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 33c66991..6ce4483c 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -29,7 +29,10 @@ jobs:
         id: build-tests
         run: |
           cd tests
-          npm run dev -- run --suite build --no-llm --output json > /tmp/build-results.json 2>&1 || true
+          # Progress goes to stderr (visible), JSON results go to file
+          npm run --silent dev -- run --suite build --no-llm --output json > /tmp/build-results.json || true
+
+          echo "--- JSON Results ---"
           cat /tmp/build-results.json
 
           # Check if any tests failed
@@ -73,7 +76,10 @@ jobs:
         id: runtime-tests
         run: |
           cd tests
-          npm run dev -- run --suite runtime --no-llm --output json > /tmp/runtime-results.json 2>&1 || true
+          # Progress goes to stderr (visible), JSON results go to file
+          npm run --silent dev -- run --suite runtime --no-llm --output json > /tmp/runtime-results.json || true
+
+          echo "--- JSON Results ---"
           cat /tmp/runtime-results.json
 
       - name: Upload runtime results
@@ -104,7 +110,10 @@ jobs:
         id: inference-tests
         run: |
           cd tests
-          npm run dev -- run --suite inference --no-llm --output json > /tmp/inference-results.json 2>&1 || true
+          # Progress goes to stderr (visible), JSON results go to file
+          npm run --silent dev -- run --suite inference --no-llm --output json > /tmp/inference-results.json || true
+
+          echo "--- JSON Results ---"
           cat /tmp/inference-results.json
 
       - name: Upload inference results
@@ -143,7 +152,10 @@ jobs:
           echo "Running LLM judge evaluation..."
 
           # Re-run all tests with LLM judge using local Ollama
-          npm run dev -- run --output json > /tmp/llm-judged-results.json 2>&1 || true
+          # Progress goes to stderr (visible), JSON results go to file
+          npm run --silent dev -- run --output json > /tmp/llm-judged-results.json || true
+
+          echo "--- JSON Results ---"
           cat /tmp/llm-judged-results.json
 
       - name: Upload final results
diff --git a/tests/src/cli.ts b/tests/src/cli.ts
index 85463545..fb4e3256 100644
--- a/tests/src/cli.ts
+++ b/tests/src/cli.ts
@@ -13,6 +13,9 @@ import { RunnerOptions } from './types.js'
 const __dirname = path.dirname(fileURLToPath(import.meta.url))
 const defaultTestcasesDir = path.join(__dirname, '..', 'testcases')
 
+// Progress output to stderr (visible in console even when stdout is redirected)
+const log = (msg: string) => process.stderr.write(msg + '\n')
+
 const program = new Command()
 
 program
@@ -36,66 +39,65 @@ program
   .option('--no-llm', 'Skip LLM judging, use simple exit code check')
   .option('--testcases-dir <dir>', 'Test cases directory', defaultTestcasesDir)
   .action(async (options) => {
-    console.log('='.repeat(60))
-    console.log('OLLAMA37 TEST RUNNER')
-    console.log('='.repeat(60))
+    log('='.repeat(60))
+    log('OLLAMA37 TEST RUNNER')
+    log('='.repeat(60))
 
     const loader = new TestLoader(options.testcasesDir)
     const executor = new TestExecutor(path.join(__dirname, '..', '..'))
     const judge = new LLMJudge(options.ollamaUrl, options.ollamaModel)
 
     // Load test cases
-    console.log('\nLoading test cases...')
+    log('\nLoading test cases...')
     let testCases = await loader.loadAll()
 
     if (options.suite) {
       testCases = testCases.filter(tc => tc.suite === options.suite)
-      console.log(`  Filtered by suite: ${options.suite}`)
+      log(`  Filtered by suite: ${options.suite}`)
     }
 
     if (options.id) {
       testCases = testCases.filter(tc => tc.id === options.id)
-      console.log(`  Filtered by ID: ${options.id}`)
+      log(`  Filtered by ID: ${options.id}`)
     }
 
     // Sort by dependencies
     testCases = loader.sortByDependencies(testCases)
 
-    console.log(`  Found ${testCases.length} test cases`)
+    log(`  Found ${testCases.length} test cases`)
 
     if (testCases.length === 0) {
-      console.log('\nNo test cases found!')
+      log('\nNo test cases found!')
       process.exit(1)
     }
 
     // Dry run
     if (options.dryRun) {
-      console.log('\nDRY RUN - Would execute:')
+      log('\nDRY RUN - Would execute:')
       for (const tc of testCases) {
-        console.log(`  ${tc.id}: ${tc.name}`)
+        log(`  ${tc.id}: ${tc.name}`)
         for (const step of tc.steps) {
-          console.log(`    - ${step.name}: ${step.command}`)
+          log(`    - ${step.name}: ${step.command}`)
         }
       }
       process.exit(0)
     }
 
-    // Execute tests
-    console.log('\nExecuting tests...')
+    // Execute tests (progress goes to stderr via executor)
     const workers = parseInt(options.workers)
     const results = await executor.executeAll(testCases, workers)
 
     // Judge results
-    console.log('\nJudging results...')
+    log('\nJudging results...')
     let judgments
     if (options.llm === false) {
-      console.log('  Using simple exit code check (--no-llm)')
+      log('  Using simple exit code check (--no-llm)')
       judgments = results.map(r => judge.simpleJudge(r))
     } else {
       try {
         judgments = await judge.judgeResults(results)
       } catch (error) {
-        console.error('  LLM judging failed, falling back to simple check:', error)
+        log(`  LLM judging failed, falling back to simple check: ${error}`)
         judgments = results.map(r => judge.simpleJudge(r))
       }
     }
@@ -107,15 +109,14 @@ program
     switch (options.output) {
       case 'json':
         const json = Reporter.toJSON(reports)
-        console.log(json)
-        writeFileSync('test-results.json', json)
-        console.log('\nResults written to test-results.json')
+        // JSON goes to stdout (can be redirected to file)
+        process.stdout.write(json + '\n')
         break
 
       case 'junit':
         const junit = Reporter.toJUnit(reports)
         writeFileSync('test-results.xml', junit)
-        console.log('\nResults written to test-results.xml')
+        log('\nResults written to test-results.xml')
         break
 
       case 'console':
@@ -124,6 +125,13 @@ program
         break
     }
 
+    // Summary
+    const passed = reports.filter(r => r.pass).length
+    const failed = reports.filter(r => !r.pass).length
+    log('\n' + '='.repeat(60))
+    log(`SUMMARY: ${passed} passed, ${failed} failed, ${reports.length} total`)
+    log('='.repeat(60))
+
     // Report to TestLink
     if (options.reportTestlink && options.testlinkApiKey) {
       const testlinkReporter = new TestLinkReporter(
@@ -132,11 +140,10 @@ program
       )
       // Would need plan ID and build ID
       // await testlinkReporter.reportResults(reports, planId, buildId)
-      console.log('\nTestLink reporting not yet implemented')
+      log('\nTestLink reporting not yet implemented')
     }
 
     // Exit with appropriate code
-    const failed = reports.filter(r => !r.pass).length
     process.exit(failed > 0 ? 1 : 0)
   })
 
diff --git a/tests/src/executor.ts b/tests/src/executor.ts
index fbaa46af..6b5bd440 100644
--- a/tests/src/executor.ts
+++ b/tests/src/executor.ts
@@ -6,11 +6,18 @@ const execAsync = promisify(exec)
 
 export class TestExecutor {
   private workingDir: string
+  private totalTests: number = 0
+  private currentTest: number = 0
 
   constructor(workingDir: string = process.cwd()) {
     this.workingDir = workingDir
   }
 
+  // Progress output goes to stderr (visible in console)
+  private progress(msg: string): void {
+    process.stderr.write(msg + '\n')
+  }
+
   async executeStep(command: string, timeout: number): Promise<StepResult> {
     const startTime = Date.now()
     let stdout = ''
@@ -47,11 +54,17 @@ export class TestExecutor {
   async executeTestCase(testCase: TestCase): Promise<TestResult> {
     const startTime = Date.now()
     const stepResults: StepResult[] = []
+    const timestamp = new Date().toISOString().substring(11, 19)
 
-    console.log(`  Executing: ${testCase.id} - ${testCase.name}`)
+    this.currentTest++
+    this.progress(`[${timestamp}] [${this.currentTest}/${this.totalTests}] ${testCase.id}: ${testCase.name}`)
 
-    for (const step of testCase.steps) {
-      console.log(`    Step: ${step.name}`)
+    for (let i = 0; i < testCase.steps.length; i++) {
+      const step = testCase.steps[i]
+      const stepTimestamp = new Date().toISOString().substring(11, 19)
+
+      this.progress(`  [${stepTimestamp}] Step ${i + 1}/${testCase.steps.length}: ${step.name}`)
+      this.progress(`    Command: ${step.command.substring(0, 80)}${step.command.length > 80 ? '...' : ''}`)
 
       const timeout = step.timeout || testCase.timeout
       const result = await this.executeStep(step.command, timeout)
@@ -59,11 +72,15 @@ export class TestExecutor {
 
       stepResults.push(result)
 
-      // Log step result
-      if (result.exitCode === 0) {
-        console.log(`      Exit: ${result.exitCode} (${result.duration}ms)`)
-      } else {
-        console.log(`      Exit: ${result.exitCode} (FAILED, ${result.duration}ms)`)
+      // Log step result with status indicator
+      const status = result.exitCode === 0 ? '✓' : '✗'
+      const duration = `${(result.duration / 1000).toFixed(1)}s`
+      this.progress(`    ${status} Exit: ${result.exitCode} (${duration})`)
+
+      // Show brief error output if failed
+      if (result.exitCode !== 0 && result.stderr) {
+        const errorPreview = result.stderr.split('\n')[0].substring(0, 100)
+        this.progress(`    Error: ${errorPreview}`)
       }
     }
 
@@ -95,6 +112,14 @@ ${r.stderr || '(empty)'}
   async executeAll(testCases: TestCase[], concurrency: number = 1): Promise<TestResult[]> {
     const results: TestResult[] = []
 
+    // Set total for progress tracking
+    this.totalTests = testCases.length
+    this.currentTest = 0
+
+    const startTimestamp = new Date().toISOString().substring(11, 19)
+    this.progress(`\n[${startTimestamp}] Starting ${this.totalTests} test(s)...`)
+    this.progress('─'.repeat(60))
+
     if (concurrency === 1) {
       // Sequential execution
       for (const tc of testCases) {
@@ -114,6 +139,11 @@ ${r.stderr || '(empty)'}
       results.push(...parallelResults)
     }
 
+    // Summary
+    const endTimestamp = new Date().toISOString().substring(11, 19)
+    this.progress('─'.repeat(60))
+    this.progress(`[${endTimestamp}] Execution complete: ${results.length} test(s)`)
+
     return results
   }
 }