.dotfiles/.config/opencode/skills/.skill-builder.disabled/scripts/grade-output.sh

#!/bin/bash
#
# grade-output.sh - Interactive grading checklist for skill outputs
#
# Usage: ./grade-output.sh <path/to/skill-directory>
#
# This script provides a structured checklist for evaluating skill test outputs.
#

set -euo pipefail

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Parse arguments
SKILL_DIR=""

usage() {
    echo "Usage: $0 <path/to/skill-directory>"
    echo ""
    echo "Examples:"
    echo "  $0 ~/.config/opencode/skills/my-skill"
    echo "  $0 ./my-skill"
    exit 1
}

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        -h|--help)
            usage
            ;;
        -*)
            echo -e "${RED}Error: Unknown option $1${NC}"
            usage
            ;;
        *)
            if [[ -z "$SKILL_DIR" ]]; then
                SKILL_DIR="$1"
            else
                echo -e "${RED}Error: Multiple skill directories provided${NC}"
                usage
            fi
            shift
            ;;
    esac
done

# Validate skill directory
if [[ -z "$SKILL_DIR" ]]; then
    echo -e "${RED}Error: Skill directory is required${NC}"
    usage
fi

# Resolve path
SKILL_DIR="$(cd "$SKILL_DIR" 2>/dev/null && pwd)" || {
    echo -e "${RED}Error: Cannot access directory: $SKILL_DIR${NC}"
    exit 1
}

# Get skill name from directory
SKILL_NAME="$(basename "$SKILL_DIR")"

echo "========================================"
echo "Grading Output for: $SKILL_NAME"
echo "========================================"
echo ""

# Check for test results
if [[ -f "$SKILL_DIR/evals/test-results.json" ]]; then
    echo -e "${BLUE}Found previous test results${NC}"
    echo ""
fi

# Initialize grading data
declare -a CRITERIA=(
    "Correctness: Output matches expected result"
    "Correctness: No factual errors"
    "Correctness: Logic is sound"
    "Correctness: Edge cases handled appropriately"
    "Completeness: All requested tasks completed"
    "Completeness: No steps skipped"
    "Completeness: Appropriate level of detail"
    "Completeness: Relevant context included"
    "Format: Output follows specified format"
    "Format: Consistent with examples in skill"
    "Format: Easy to read and understand"
    "Triggering: Skill activated when appropriate"
    "Triggering: Did not activate when inappropriate"
    "Efficiency: No unnecessary steps"
    "Efficiency: Reasonable response length"
    "Efficiency: Not overly verbose"
)

GRADES=()
ISSUES=()

# Function to ask yes/no question
ask_yes_no() {
    local prompt="$1"
    while true; do
        read -p "$prompt (y/n): " -n 1 -r
        echo
        case $REPLY in
            [Yy])
                return 0
                ;;
            [Nn])
                return 1
                ;;
            *)
                echo "Please enter y or n"
                ;;
        esac
    done
}

echo "This checklist will help you systematically evaluate the skill output."
echo "Answer each question based on the test results you observed."
echo ""
read -p "Press Enter to begin grading..."
echo ""

# Grade each criterion
echo "========================================"
echo -e "${CYAN}Grading Criteria${NC}"
echo "========================================"
echo ""

for criterion in "${CRITERIA[@]}"; do
    category="${criterion%%:*}"
    description="${criterion#*: }"

    echo -e "${BLUE}[$category]${NC} $description"

    if ask_yes_no "  Does it meet this criterion"; then
        GRADES+=("$criterion: PASS")
        echo -e "  ${GREEN}✓ Pass${NC}"
    else
        GRADES+=("$criterion: FAIL")
        echo -e "  ${RED}✗ Fail${NC}"

        # Ask for issue description
        echo "  Briefly describe the issue:"
        read -r issue
        if [[ -n "$issue" ]]; then
            ISSUES+=("[$category] $description: $issue")
        fi
    fi
    echo ""
done

# Overall assessment
echo "========================================"
echo -e "${CYAN}Overall Assessment${NC}"
echo "========================================"
echo ""

echo "Overall Result:"
echo "  [p] Pass - All or most criteria met"
echo "  [f] Fail - Significant issues found"
echo "  [i] Incomplete - Needs more testing"
echo ""

while true; do
    read -p "Overall result (p/f/i): " -n 1 -r
    echo
    case $REPLY in
        [Pp])
            OVERALL_RESULT="pass"
            break
            ;;
        [Ff])
            OVERALL_RESULT="fail"
            break
            ;;
        [Ii])
            OVERALL_RESULT="incomplete"
            break
            ;;
        *)
            echo "Please enter p, f, or i"
            ;;
    esac
done

echo ""

# Priority assessment
echo "Priority of fixes needed:"
echo "  [h] High - Critical issues, skill not usable"
echo "  [m] Medium - Important issues, skill partially works"
echo "  [l] Low - Minor issues, skill mostly works"
echo ""

while true; do
    read -p "Priority (h/m/l): " -n 1 -r
    echo
    case $REPLY in
        [Hh])
            PRIORITY="high"
            break
            ;;
        [Mm])
            PRIORITY="medium"
            break
            ;;
        [Ll])
            PRIORITY="low"
            break
            ;;
        *)
            echo "Please enter h, m, or l"
            ;;
    esac
done

echo ""

# Suggested fixes
echo -e "${BLUE}Suggested Improvements (optional):${NC}"
echo "Describe what changes would address the issues:"
read -r SUGGESTED_FIXES

echo ""

# Pattern analysis
echo "========================================"
echo -e "${CYAN}Pattern Analysis${NC}"
echo "========================================"
echo ""

if ask_yes_no "Did the same issue appear in multiple test cases"; then
    echo "This suggests a systemic problem. Consider:"
    echo "  - Fixing the root cause rather than symptoms"
    echo "  - Adding a helper script for repeated tasks"
    echo "  - Clarifying instructions in SKILL.md"
    PATTERN="systemic"
else
    echo "Issues appear to be isolated to specific cases."
    PATTERN="isolated"
fi

echo ""

# Extract to script recommendation
if ask_yes_no "Should any repeated work be extracted to a script"; then
    echo "Consider creating a script in scripts/ directory."
    EXTRACT_SCRIPT="true"
else
    EXTRACT_SCRIPT="false"
fi

echo ""

# Generate grading report
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")

# Build issues array
ISSUES_JSON="["
for i in "${!ISSUES[@]}"; do
    if [[ $i -gt 0 ]]; then
        ISSUES_JSON+=","
    fi
    ISSUES_JSON+="\"${ISSUES[$i]}\""
done
ISSUES_JSON+="]"

# Build grades array
GRADES_JSON="["
for i in "${!GRADES[@]}"; do
    if [[ $i -gt 0 ]]; then
        GRADES_JSON+=","
    fi
    GRADES_JSON+="\"${GRADES[$i]}\""
done
GRADES_JSON+="]"

# Calculate pass rate
TOTAL_CRITERIA=${#CRITERIA[@]}
PASSED_COUNT=0
for grade in "${GRADES[@]}"; do
    if [[ "$grade" == *"PASS" ]]; then
        ((PASSED_COUNT++))
    fi
done

PASS_RATE=$((PASSED_COUNT * 100 / TOTAL_CRITERIA))

cat > "$SKILL_DIR/evals/grading-report.json" << EOF
{
  "skill_name": "$SKILL_NAME",
  "timestamp": "$TIMESTAMP",
  "overall_result": "$OVERALL_RESULT",
  "priority": "$PRIORITY",
  "pass_rate": $PASS_RATE,
  "criteria_passed": $PASSED_COUNT,
  "criteria_total": $TOTAL_CRITERIA,
  "pattern_analysis": "$PATTERN",
  "extract_script_recommended": $EXTRACT_SCRIPT,
  "detailed_grades": $GRADES_JSON,
  "issues": $ISSUES_JSON,
  "suggested_fixes": "$SUGGESTED_FIXES"
}
EOF

echo "========================================"
echo -e "${GREEN}Grading Report Generated${NC}"
echo "========================================"
echo ""
echo "Saved to: evals/grading-report.json"
echo ""
echo "Summary:"
echo "  Overall: $OVERALL_RESULT"
echo "  Priority: $PRIORITY"
echo "  Pass Rate: $PASS_RATE% ($PASSED_COUNT/$TOTAL_CRITERIA criteria)"
echo "  Pattern: $PATTERN issues"
echo ""

if [[ ${#ISSUES[@]} -gt 0 ]]; then
    echo -e "${YELLOW}Issues Found:${NC}"
    for issue in "${ISSUES[@]}"; do
        echo "  - $issue"
    done
    echo ""
fi

# Next steps
echo "========================================"
echo "Next Steps"
echo "========================================"
echo ""

if [[ "$OVERALL_RESULT" == "pass" ]]; then
    echo -e "${GREEN}✓ Skill is working well!${NC}"
    echo ""
    echo "Consider:"
    echo "  - Adding more edge case tests"
    echo "  - Optimizing the description"
    echo "  - Documenting the skill"
else
    echo "To improve the skill:"
    echo ""
    echo "1. Review grading-report.json for details"
    echo "2. Update SKILL.md based on the issues found"
    echo ""
    if [[ "$EXTRACT_SCRIPT" == "true" ]]; then
        echo "3. Create helper scripts for repeated tasks"
        echo "   - Place scripts in scripts/ directory"
        echo "   - Update SKILL.md to reference them"
        echo ""
    fi
    echo "4. Re-run tests to verify improvements:"
    echo "   ~/.config/opencode/skills/skill-builder/scripts/run-tests.sh $SKILL_DIR"
fi

echo ""
echo -e "${GREEN}Done!${NC}"