326 lines
8.4 KiB
Bash
Executable File
326 lines
8.4 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# run-tests.sh - Execute test workflow and capture results
|
|
#
|
|
# Usage: ./run-tests.sh <path/to/skill-directory>
|
|
#
|
|
# This script guides you through testing each test case manually and records results.
|
|
#
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Parse arguments
|
|
SKILL_DIR=""
|
|
|
|
usage() {
|
|
echo "Usage: $0 <path/to/skill-directory>"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " $0 ~/.config/opencode/skills/my-skill"
|
|
echo " $0 ./my-skill"
|
|
exit 1
|
|
}
|
|
|
|
# Parse command line arguments
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
-h|--help)
|
|
usage
|
|
;;
|
|
-*)
|
|
echo -e "${RED}Error: Unknown option $1${NC}"
|
|
usage
|
|
;;
|
|
*)
|
|
if [[ -z "$SKILL_DIR" ]]; then
|
|
SKILL_DIR="$1"
|
|
else
|
|
echo -e "${RED}Error: Multiple skill directories provided${NC}"
|
|
usage
|
|
fi
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Validate skill directory
|
|
if [[ -z "$SKILL_DIR" ]]; then
|
|
echo -e "${RED}Error: Skill directory is required${NC}"
|
|
usage
|
|
fi
|
|
|
|
# Resolve path
|
|
SKILL_DIR="$(cd "$SKILL_DIR" 2>/dev/null && pwd)" || {
|
|
echo -e "${RED}Error: Cannot access directory: $SKILL_DIR${NC}"
|
|
exit 1
|
|
}
|
|
|
|
# Get skill name from directory
|
|
SKILL_NAME="$(basename "$SKILL_DIR")"
|
|
|
|
# Verify evals.json exists
|
|
if [[ ! -f "$SKILL_DIR/evals/evals.json" ]]; then
|
|
echo -e "${RED}Error: evals/evals.json not found${NC}"
|
|
echo ""
|
|
echo "Create test cases first:"
|
|
echo " ~/.config/opencode/skills/skill-builder/scripts/create-tests.sh $SKILL_DIR"
|
|
exit 1
|
|
fi
|
|
|
|
echo "========================================"
|
|
echo "Running Tests for: $SKILL_NAME"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
# Check if jq is available
|
|
if ! command -v jq &> /dev/null; then
|
|
echo -e "${YELLOW}⚠ jq is not installed${NC}"
|
|
echo " Install jq for better JSON handling:"
|
|
echo " Ubuntu/Debian: sudo apt-get install jq"
|
|
echo " macOS: brew install jq"
|
|
echo ""
|
|
echo -e "${YELLOW}Falling back to basic parsing...${NC}"
|
|
USE_JQ=false
|
|
else
|
|
USE_JQ=true
|
|
fi
|
|
|
|
# Initialize results array
|
|
RESULTS=()
|
|
TEST_COUNT=0
|
|
PASSED=0
|
|
FAILED=0
|
|
SKIPPED=0
|
|
|
|
# Function to extract value from JSON using basic parsing
|
|
get_json_value() {
|
|
local file="$1"
|
|
local key="$2"
|
|
local index="${3:-}"
|
|
|
|
if [[ "$USE_JQ" == true ]]; then
|
|
if [[ -n "$index" ]]; then
|
|
jq -r ".evals[$index].$key" "$file" 2>/dev/null || echo ""
|
|
else
|
|
jq -r ".$key" "$file" 2>/dev/null || echo ""
|
|
fi
|
|
else
|
|
# Basic grep-based extraction (fallback)
|
|
if [[ -n "$index" ]]; then
|
|
# This is a simplified fallback - won't handle nested structures well
|
|
grep -A 100 '"evals":' "$file" | grep -A 20 "\"id\": $index" | grep "\"$key\":" | head -1 | sed 's/.*"'$key'": "\(.*\)".*/\1/' | sed 's/",*$//'
|
|
else
|
|
grep "\"$key\":" "$file" | head -1 | sed 's/.*"'$key'": "\(.*\)".*/\1/' | sed 's/",*$//'
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Count test cases
|
|
if [[ "$USE_JQ" == true ]]; then
|
|
TEST_COUNT=$(jq '.evals | length' "$SKILL_DIR/evals/evals.json")
|
|
else
|
|
TEST_COUNT=$(grep -c '"id":' "$SKILL_DIR/evals/evals.json" 2>/dev/null || echo "0")
|
|
fi
|
|
|
|
if [[ "$TEST_COUNT" -eq 0 ]]; then
|
|
echo -e "${RED}Error: No test cases found in evals.json${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Found $TEST_COUNT test case(s)"
|
|
echo ""
|
|
|
|
# Process each test case
|
|
for ((i=0; i<TEST_COUNT; i++)); do
|
|
echo "========================================"
|
|
echo -e "${CYAN}Test Case $((i+1)) of $TEST_COUNT${NC}"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
# Extract test details
|
|
if [[ "$USE_JQ" == true ]]; then
|
|
TEST_ID=$(jq -r ".evals[$i].id" "$SKILL_DIR/evals/evals.json")
|
|
TEST_NAME=$(jq -r ".evals[$i].name" "$SKILL_DIR/evals/evals.json")
|
|
TEST_PROMPT=$(jq -r ".evals[$i].prompt" "$SKILL_DIR/evals/evals.json")
|
|
TEST_EXPECTED=$(jq -r ".evals[$i].expected_output" "$SKILL_DIR/evals/evals.json")
|
|
TEST_TYPE=$(jq -r ".evals[$i].type // .evals[$i].name" "$SKILL_DIR/evals/evals.json")
|
|
else
|
|
# Fallback parsing
|
|
TEST_ID=$((i+1))
|
|
TEST_NAME="test-$((i+1))"
|
|
TEST_PROMPT="[Prompt extraction requires jq - install jq for full functionality]"
|
|
TEST_EXPECTED="[Expected output extraction requires jq]"
|
|
TEST_TYPE="unknown"
|
|
fi
|
|
|
|
echo -e "${BLUE}Test Name:${NC} $TEST_NAME"
|
|
echo -e "${BLUE}Type:${NC} $TEST_TYPE"
|
|
echo ""
|
|
echo -e "${BLUE}Prompt:${NC}"
|
|
echo "----------------------------------------"
|
|
echo "$TEST_PROMPT"
|
|
echo "----------------------------------------"
|
|
echo ""
|
|
|
|
echo -e "${BLUE}Expected Output:${NC}"
|
|
echo "$TEST_EXPECTED"
|
|
echo ""
|
|
|
|
# Instructions for manual testing
|
|
echo -e "${YELLOW}Instructions:${NC}"
|
|
echo "1. Copy the prompt above"
|
|
echo "2. Open a new opencode session"
|
|
echo "3. Paste the prompt and observe the result"
|
|
echo "4. Return here to record the outcome"
|
|
echo ""
|
|
|
|
read -p "Press Enter when ready to record results..."
|
|
echo ""
|
|
|
|
# Get test result
|
|
echo "Test Result:"
|
|
echo " [p] Pass - Output met expectations"
|
|
echo " [f] Fail - Output did not meet expectations"
|
|
echo " [s] Skip - Could not test or not applicable"
|
|
echo ""
|
|
|
|
while true; do
|
|
read -p "Result (p/f/s): " -n 1 -r
|
|
echo
|
|
case $REPLY in
|
|
[Pp])
|
|
TEST_RESULT="pass"
|
|
((PASSED++))
|
|
break
|
|
;;
|
|
[Ff])
|
|
TEST_RESULT="fail"
|
|
((FAILED++))
|
|
break
|
|
;;
|
|
[Ss])
|
|
TEST_RESULT="skip"
|
|
((SKIPPED++))
|
|
break
|
|
;;
|
|
*)
|
|
echo "Please enter p, f, or s"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
echo ""
|
|
|
|
# Get notes
|
|
echo -e "${BLUE}Notes (optional):${NC}"
|
|
echo "Describe any issues, observations, or suggestions:"
|
|
read -r TEST_NOTES
|
|
|
|
# Get timestamp
|
|
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
# Build result entry
|
|
RESULT_ENTRY=$(cat << EOF
|
|
{
|
|
"test_id": $TEST_ID,
|
|
"test_name": "$TEST_NAME",
|
|
"result": "$TEST_RESULT",
|
|
"notes": "$TEST_NOTES",
|
|
"timestamp": "$TIMESTAMP"
|
|
}
|
|
EOF
|
|
)
|
|
|
|
RESULTS+=("$RESULT_ENTRY")
|
|
|
|
echo ""
|
|
echo -e "${GREEN}✓ Recorded test result${NC}"
|
|
echo ""
|
|
|
|
# Ask to continue or stop
|
|
if [[ $i -lt $((TEST_COUNT-1)) ]]; then
|
|
read -p "Continue to next test? (Y/n): " -n 1 -r
|
|
echo
|
|
if [[ $REPLY =~ ^[Nn]$ ]]; then
|
|
echo "Stopping test run..."
|
|
break
|
|
fi
|
|
echo ""
|
|
fi
|
|
done
|
|
|
|
# Generate test-results.json
|
|
echo "========================================"
|
|
echo "Generating Test Results"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
# Create results JSON
|
|
RUN_TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
# Build JSON array from results
|
|
RESULTS_JSON="["
|
|
for i in "${!RESULTS[@]}"; do
|
|
if [[ $i -gt 0 ]]; then
|
|
RESULTS_JSON+=","
|
|
fi
|
|
RESULTS_JSON+="${RESULTS[$i]}"
|
|
done
|
|
RESULTS_JSON+="]"
|
|
|
|
cat > "$SKILL_DIR/evals/test-results.json" << EOF
|
|
{
|
|
"skill_name": "$SKILL_NAME",
|
|
"run_timestamp": "$RUN_TIMESTAMP",
|
|
"summary": {
|
|
"total": $((${#RESULTS[@]})),
|
|
"passed": $PASSED,
|
|
"failed": $FAILED,
|
|
"skipped": $SKIPPED
|
|
},
|
|
"results": $RESULTS_JSON
|
|
}
|
|
EOF
|
|
|
|
echo -e "${GREEN}✓ Saved results to evals/test-results.json${NC}"
|
|
echo ""
|
|
|
|
# Display summary
|
|
echo "========================================"
|
|
echo "Test Run Summary"
|
|
echo "========================================"
|
|
echo ""
|
|
echo "Total Tests: $((${#RESULTS[@]}))"
|
|
echo -e "${GREEN}Passed: $PASSED${NC}"
|
|
echo -e "${RED}Failed: $FAILED${NC}"
|
|
echo -e "${YELLOW}Skipped: $SKIPPED${NC}"
|
|
echo ""
|
|
|
|
# Next steps
|
|
if [[ $FAILED -gt 0 ]]; then
|
|
echo "========================================"
|
|
echo -e "${YELLOW}Next Steps:${NC}"
|
|
echo "========================================"
|
|
echo ""
|
|
echo "Some tests failed. To improve your skill:"
|
|
echo ""
|
|
echo "1. Review test-results.json for details"
|
|
echo "2. Update SKILL.md to address issues"
|
|
echo "3. Run tests again:"
|
|
echo " $0 $SKILL_DIR"
|
|
echo ""
|
|
echo "For detailed grading:"
|
|
echo " ~/.config/opencode/skills/skill-builder/scripts/grade-output.sh $SKILL_DIR"
|
|
echo ""
|
|
fi
|
|
|
|
echo -e "${GREEN}Done!${NC}"
|