-
Notifications
You must be signed in to change notification settings - Fork 14
172 lines (152 loc) · 5.17 KB
/
skills-ci-example.yml
File metadata and controls
172 lines (152 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
name: Skills CI Example - Waza Evaluation
# This is a template workflow for microsoft/skills repositories.
# Copy this file to your skill repository's .github/workflows/ directory
# and customize it for your skill.
#
# This example shows how to:
# 1. Install waza from source
# 2. Run evaluations with the mock executor (no API keys needed)
# 3. Upload results as artifacts
# 4. Use exit codes for CI pass/fail
on:
# Trigger on pull requests
pull_request:
branches: [ main ]
paths:
- 'SKILL.md'
- 'eval/**'
- '.github/workflows/skills-ci-example.yml'
# Trigger on pushes to main
push:
branches: [ main ]
paths:
- 'SKILL.md'
- 'eval/**'
# Allow manual trigger
workflow_dispatch:
inputs:
eval-yaml:
description: 'Path to evaluation YAML file'
required: false
type: string
default: 'eval/eval.yaml'
permissions:
contents: read
jobs:
evaluate-skill:
name: Evaluate Skill with Waza
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Setup Go Environment
uses: actions/setup-go@v5
with:
# Waza requires Go 1.26+
go-version: '1.26'
# Option 1: Install from source (recommended for CI)
- name: Install Waza from Source
run: |
go install github.com/microsoft/waza/cmd/waza@latest
waza --version
# Option 2: Build from Dockerfile (alternative)
# Uncomment this block if you prefer Docker-based builds
# - name: Build Waza Docker Image
# run: |
# docker build -t waza:local .
# docker run waza:local --version
- name: Determine Eval File
id: eval-file
run: |
# Use workflow input if provided, otherwise default
if [ -n "${{ inputs.eval-yaml }}" ]; then
EVAL_FILE="${{ inputs.eval-yaml }}"
else
# Default location for skill evals
EVAL_FILE="eval/eval.yaml"
fi
# Verify file exists
if [ ! -f "$EVAL_FILE" ]; then
echo "::error::Evaluation file not found: $EVAL_FILE"
echo "Expected structure:"
echo " your-skill/"
echo " ├── SKILL.md"
echo " └── eval/"
echo " ├── eval.yaml"
echo " ├── tasks/"
echo " └── fixtures/"
exit 1
fi
echo "eval-file=$EVAL_FILE" >> "$GITHUB_OUTPUT"
echo "Using eval file: $EVAL_FILE"
- name: Run Waza Evaluation
id: run-eval
run: |
EVAL_FILE="${{ steps.eval-file.outputs.eval-file }}"
# Run waza with mock executor (no API keys needed)
# The mock executor simulates agent behavior for testing
# Exit codes: 0=success, 1=test failure, 2=config error
waza run "$EVAL_FILE" \
--verbose \
--output results.json
# The workflow will fail if tests fail (exit code 1)
# or if there's a configuration error (exit code 2)
- name: Upload Results Artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: waza-evaluation-results
path: |
results.json
transcripts/
retention-days: 30
if-no-files-found: warn
- name: Display Results Summary
if: always()
run: |
if [ -f results.json ]; then
echo "## Evaluation Results" >> "$GITHUB_STEP_SUMMARY"
echo '```json' >> "$GITHUB_STEP_SUMMARY"
head -50 results.json >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
fi
- name: Check Evaluation Status
if: steps.run-eval.outcome == 'failure'
run: |
echo "::error::Waza evaluation failed. Check the results artifact for details."
exit 1
# Optional: Test with Copilot SDK executor (requires GITHUB_TOKEN)
# Uncomment this job if you want to test with actual AI models
# evaluate-with-copilot:
# name: Evaluate with Copilot SDK
# runs-on: ubuntu-latest
# if: github.event_name == 'push' && github.ref == 'refs/heads/main'
#
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v4
#
# - name: Setup Go Environment
# uses: actions/setup-go@v5
# with:
# go-version: '1.26'
#
# - name: Install Waza
# run: |
# go install github.com/microsoft/waza/cmd/waza@latest
# waza --version
#
# - name: Run Evaluation with Copilot
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# run: |
# # Update eval.yaml to use copilot-sdk executor
# # Then run evaluation
# waza run eval/eval.yaml --verbose --output results-copilot.json
#
# - name: Upload Copilot Results
# if: always()
# uses: actions/upload-artifact@v4
# with:
# name: waza-copilot-results
# path: results-copilot.json