cs249r_book/.github/workflows/all-contributors-add.yml

# =============================================================================
# ALL-CONTRIBUTORS AUTO-ADD WORKFLOW (LLM-Powered)
# =============================================================================
# Automatically adds contributors when someone comments with @all-contributors.
#
# Username is extracted DETERMINISTICALLY via regex from @mentions.
# Uses Ollama LLM ONLY to classify contribution type(s) from natural language.
#
# Project detection is DETERMINISTIC (not LLM-guessed); MULTIPLE projects supported:
#   - Explicit mention in comment: "in TinyTorch", "for book, kits" → adds to all mentioned
#   - PR file paths (single project only): tinytorch/ → tinytorch, book/ → book, etc.
#   - Issue labels/title context
#   - If none of the above → asks the user (never silently defaults)
#
# Flexible formats - all of these work:
#   @all-contributors @username helped verify the fix worked
#   @all-contributors please add @jane-doe for Doc in TinyTorch
#   @all-contributors @user123 for code, doc in tinytorch, book
#   @all-contributors @dev42 implemented feature and wrote tests in tinytorch
# =============================================================================

name: '🤖 All Contributors Add'

on:
  issue_comment:
    types: [created, edited]

# =============================================================================
# CONFIGURATION - Edit these values to customize the workflow
# =============================================================================
env:
  # LLM Configuration
  LLM_MODEL: 'llama3.1:8b'

  # Git Configuration
  TARGET_BRANCH: 'dev'

  # Valid contribution types (comma-separated)
  CONTRIBUTION_TYPES: 'bug,code,doc,design,ideas,review,test,tool'

  # Valid projects (comma-separated)
  PROJECTS: 'book,tinytorch,kits,labs'

  # Project aliases (format: alias1:project1,alias2:project2)
  PROJECT_ALIASES: 'tito:tinytorch'

jobs:
  add-contributor:
    name: Add Contributor
    # Only run if comment contains the trigger phrase
    if: contains(github.event.comment.body, '@all-contributors')
    runs-on: ubuntu-latest

    permissions:
      contents: write
      issues: write
      pull-requests: write

    steps:
      # =====================================================================
      # STEP 1: Extract trigger line + detect project from PR files
      # =====================================================================
      - name: Extract trigger line, username, and detect project
        id: extract
        uses: actions/github-script@v8
        env:
          PROJECTS: ${{ env.PROJECTS }}
          PROJECT_ALIASES: ${{ env.PROJECT_ALIASES }}
        with:
          script: |
            const body = context.payload.comment.body;

            // Find the line containing @all-contributors
            const lines = body.split('\n');
            const triggerLine = lines.find(line => line.includes('@all-contributors'));

            if (!triggerLine) {
              console.log('No @all-contributors line found');
              core.setOutput('should_run', 'false');
              return;
            }

            console.log('Trigger line:', triggerLine);

            // --- Configuration ---
            const validProjects = process.env.PROJECTS.split(',');
            const projectAliases = {};
            if (process.env.PROJECT_ALIASES) {
              process.env.PROJECT_ALIASES.split(',').forEach(pair => {
                const [alias, proj] = pair.split(':');
                if (alias && proj) projectAliases[alias.trim()] = proj.trim();
              });
            }

            // --- Helper: detect ALL project names in text (for multi-project support) ---
            const detectProjectsInText = (text) => {
              const lower = text.toLowerCase();
              const found = new Set();
              for (const p of validProjects) {
                if (lower.includes(p)) found.add(p);
              }
              for (const [alias, proj] of Object.entries(projectAliases)) {
                if (lower.includes(alias)) found.add(proj);
              }
              return [...found];
            };

            // --- Get issue/PR context ---
            const issue = context.payload.issue;
            const labels = issue.labels.map(l => l.name.toLowerCase());
            const issueContext = `Issue title: ${issue.title}\nLabels: ${labels.join(', ') || 'none'}`;

            // =============================================================
            // PROJECT DETECTION (deterministic, priority order)
            // Supports multiple projects in one comment, e.g. "in TinyTorch, Book, Kits"
            // =============================================================
            let projects = [];
            let projectSource = 'unknown';

            // Priority 1: Explicit mention(s) in the trigger comment (can be multiple)
            const commentProjects = detectProjectsInText(triggerLine);
            if (commentProjects.length > 0) {
              projects = commentProjects;
              projectSource = 'comment';
              console.log(`Projects from comment: ${JSON.stringify(projects)}`);
            }

            // Priority 2: PR changed files (top-level dir → project)
            if (projects.length === 0 && issue.pull_request) {
              try {
                const { data: files } = await github.rest.pulls.listFiles({
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  pull_number: issue.number,
                  per_page: 100
                });

                const projectCounts = {};
                for (const file of files) {
                  const topDir = file.filename.split('/')[0];
                  if (validProjects.includes(topDir)) {
                    projectCounts[topDir] = (projectCounts[topDir] || 0) + 1;
                  }
                }

                const detected = Object.keys(projectCounts);
                console.log('PR file project counts:', JSON.stringify(projectCounts));

                if (detected.length === 1) {
                  projects = [detected[0]];
                  projectSource = 'pr_files';
                  console.log(`Project from PR files: "${projects[0]}"`);
                } else if (detected.length > 1) {
                  projectSource = 'ambiguous';
                  console.log('PR spans multiple projects:', detected.join(', '));
                }
              } catch (e) {
                console.log('Could not fetch PR files:', e.message);
              }
            }

            // Priority 3: Issue labels / title
            if (projects.length === 0) {
              const contextProjects = detectProjectsInText(issueContext);
              if (contextProjects.length > 0) {
                projects = contextProjects;
                projectSource = 'issue_context';
                console.log(`Projects from issue context: ${JSON.stringify(projects)}`);
              }
            }

            console.log(`Final projects: ${JSON.stringify(projects)} (source: ${projectSource})`);

            // =============================================================
            // USERNAME EXTRACTION (deterministic — regex, not LLM)
            // =============================================================
            const mentions = triggerLine.match(/@([\w][\w-]*)/g);
            const cleanMentions = mentions
              ? mentions.map(m => m.replace(/^@/, '')).filter(m => m !== 'all-contributors')
              : [];

            const username = cleanMentions.length > 0 ? cleanMentions[0] : '';
            console.log(`Username from @mention: "${username}"`);

            if (!username) {
              console.log('No username @mention found in trigger line');
            }

            core.setOutput('should_run', 'true');
            core.setOutput('trigger_line', triggerLine);
            core.setOutput('username', username);
            core.setOutput('issue_context', issueContext);
            core.setOutput('projects', JSON.stringify(projects));
            core.setOutput('project', projects.length > 0 ? projects[0] : '');
            core.setOutput('project_source', projectSource);

      # =====================================================================
      # STEP 2: LLM classifies contribution types ONLY (username is from regex)
      # =====================================================================
      - name: Classify contribution types with LLM
        if: steps.extract.outputs.should_run == 'true' && steps.extract.outputs.username != ''
        uses: ai-action/ollama-action@v2
        id: llm
        with:
          model: ${{ env.LLM_MODEL }}
          prompt: |
            Classify the contribution type(s) from this comment.

            COMMENT: ${{ steps.extract.outputs.trigger_line }}

            CONTRIBUTION TYPES (pick one or more):
            - bug: Found or reported a bug, identified issues
            - code: Wrote code, implemented features, fixed bugs
            - doc: Wrote documentation, improved docs, fixed typos
            - design: UI/UX design, visual design, architecture design
            - ideas: Suggested ideas, proposed features, brainstormed
            - review: Reviewed code or PRs, gave feedback on changes
            - test: Tested features, verified fixes, QA testing
            - tool: Built tools, scripts, automation, CLI utilities

            Return ONLY a JSON object with exactly this field:
            {
              "types": ["<contribution-type>"]
            }

            RULES:
            - types: Array of one or more contribution types from the list above.
            - Do NOT include username or project fields. Those are detected separately.

            EXAMPLES:
            Input: "@all-contributors @jane-doe fixed typos in the documentation"
            Output: {"types": ["doc"]}

            Input: "@all-contributors @dev42 implemented the new feature and wrote tests"
            Output: {"types": ["code", "test"]}

            Input: "@all-contributors please add @user123 for code"
            Output: {"types": ["code"]}

            Input: "@all-contributors @reviewer99 gave feedback on the PR"
            Output: {"types": ["review"]}

            Return ONLY the JSON object, no explanation or other text.

      # =====================================================================
      # STEP 3: Parse LLM types + combine with deterministic username & project
      # =====================================================================
      - name: Validate and combine results
        if: steps.extract.outputs.should_run == 'true'
        id: parse
        uses: actions/github-script@v8
        env:
          LLM_RESPONSE: ${{ steps.llm.outputs.response || '' }}
          USERNAME: ${{ steps.extract.outputs.username }}
          TRIGGER_LINE: ${{ steps.extract.outputs.trigger_line }}
          PROJECTS_JSON: ${{ steps.extract.outputs.projects }}
          PROJECT_SOURCE: ${{ steps.extract.outputs.project_source }}
          CONTRIBUTION_TYPES: ${{ env.CONTRIBUTION_TYPES }}
          PROJECTS: ${{ env.PROJECTS }}
        with:
          script: |
            const response = process.env.LLM_RESPONSE || '';
            const username = process.env.USERNAME || '';
            const triggerLine = process.env.TRIGGER_LINE || '';
            const validTypes = process.env.CONTRIBUTION_TYPES.split(',');
            const validProjects = process.env.PROJECTS.split(',');

            let projects = [];
            try {
              projects = JSON.parse(process.env.PROJECTS_JSON || '[]');
              if (!Array.isArray(projects)) projects = [];
            } catch (e) {
              console.log('Failed to parse projects JSON');
            }
            const projectSource = process.env.PROJECT_SOURCE || '';

            console.log('Username (from regex):', username);
            console.log('LLM response:', response);
            console.log('Projects:', JSON.stringify(projects), `(source: ${projectSource})`);

            // --- Validate username (extracted deterministically in Step 1) ---
            if (!username) {
              core.setOutput('success', 'false');
              core.setOutput('error', 'no_username');
              return;
            }

            // --- Parse contribution types from LLM response ---
            let types = [];
            try {
              const jsonMatch = response.match(/\{[\s\S]*?\}/);
              if (jsonMatch) {
                const parsed = JSON.parse(jsonMatch[0]);
                if (parsed.types && Array.isArray(parsed.types)) {
                  types = parsed.types
                    .map(t => t.toLowerCase().trim())
                    .filter(t => validTypes.includes(t));
                }
              }
            } catch (e) {
              console.log('Failed to parse LLM JSON:', e.message);
            }

            // --- Validate types ---
            if (types.length === 0) {
              core.setOutput('success', 'false');
              core.setOutput('error', 'no_types');
              core.setOutput('username', username);
              return;
            }

            // --- Validate projects (one or more, all must be valid) ---
            const validProjectList = projects.filter(p => p && validProjects.includes(p));
            if (validProjectList.length === 0) {
              console.log('No valid project(s) detected — will ask user');
              core.setOutput('success', 'false');
              core.setOutput('error', 'no_project');
              core.setOutput('username', username);
              core.setOutput('types', JSON.stringify(types));
              core.setOutput('project_source', projectSource);
              return;
            }

            // --- All good (may have multiple projects) ---
            console.log('Final result:', { username, types, projects: validProjectList, projectSource });

            core.setOutput('success', 'true');
            core.setOutput('username', username);
            core.setOutput('types', JSON.stringify(types));
            core.setOutput('projects', JSON.stringify(validProjectList));
            core.setOutput('project', validProjectList[0]);
            core.setOutput('project_source', projectSource);

      # =====================================================================
      # STEP 4: Checkout, update config, generate READMEs, commit
      # =====================================================================
      - name: Checkout repository
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        uses: actions/checkout@v6
        with:
          ref: ${{ env.TARGET_BRANCH }}
          fetch-depth: 0

      - name: Setup Python
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        uses: actions/setup-python@v6
        with:
          python-version: '3.11'

      - name: Get user info from GitHub
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        id: userinfo
        uses: actions/github-script@v8
        with:
          script: |
            const username = '${{ steps.parse.outputs.username }}';

            try {
              const { data: user } = await github.rest.users.getByUsername({
                username: username
              });

              core.setOutput('name', user.name || username);
              core.setOutput('avatar_url', user.avatar_url);
              core.setOutput('profile', user.html_url);
              core.setOutput('found', 'true');
            } catch (error) {
              console.log(`User ${username} not found, using defaults`);
              core.setOutput('name', username);
              core.setOutput('avatar_url', `https://avatars.githubusercontent.com/${username}`);
              core.setOutput('profile', `https://github.com/${username}`);
              core.setOutput('found', 'false');
            }

      - name: Update contributor config
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        env:
          PROJECTS: ${{ env.PROJECTS }}
        run: |
          python3 << 'EOF'
          import json
          import os

          projects = json.loads('${{ steps.parse.outputs.projects }}')
          username = "${{ steps.parse.outputs.username }}"
          types = json.loads('${{ steps.parse.outputs.types }}')
          name = "${{ steps.userinfo.outputs.name }}"
          avatar_url = "${{ steps.userinfo.outputs.avatar_url }}"
          profile = "${{ steps.userinfo.outputs.profile }}"

          valid_projects = os.environ.get('PROJECTS', 'book').split(',')
          config_paths = {p: f'{p}/.all-contributorsrc' for p in valid_projects}
          updated_paths = []

          for project in projects:
              config_path = config_paths.get(project)
              if not config_path or not os.path.isfile(config_path):
                  print(f"Skipping {project}: no config at {config_path}")
                  continue

              with open(config_path, 'r') as f:
                  config = json.load(f)

              contributors = config.get('contributors', [])

              existing = None
              for i, c in enumerate(contributors):
                  if c.get('login', '').lower() == username.lower():
                      existing = i
                      break

              if existing is not None:
                  existing_types = set(contributors[existing].get('contributions', []))
                  new_types = existing_types | set(types)
                  contributors[existing]['contributions'] = sorted(list(new_types))
                  print(f"[{project}] Updated existing contributor {username} with types: {sorted(list(new_types))}")
              else:
                  new_contributor = {
                      'login': username,
                      'name': name,
                      'avatar_url': avatar_url,
                      'profile': profile,
                      'contributions': sorted(types)
                  }
                  contributors.append(new_contributor)
                  print(f"[{project}] Added new contributor {username} with types: {sorted(types)}")

              config['contributors'] = contributors

              with open(config_path, 'w') as f:
                  json.dump(config, f, indent=4)
                  f.write('\n')

              print(f"Updated {config_path}")
              updated_paths.append(config_path)

          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
              f.write(f"updated_configs={json.dumps(updated_paths)}\n")
          EOF

      - name: Generate README tables
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        run: |
          # Generate tables for each project we updated
          PROJECTS_JSON='${{ steps.parse.outputs.projects }}'
          for project in $(echo "$PROJECTS_JSON" | python3 -c "import sys,json; print(' '.join(json.load(sys.stdin)))"); do
            python3 ${{ github.workspace }}/.github/workflows/contributors/generate_readme_tables.py --project "$project" --update
          done
          # Also regenerate the main README
          python3 ${{ github.workspace }}/.github/workflows/contributors/generate_main_readme.py

      - name: Configure Git
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        run: |
          git config --global user.name "github-actions[bot]"
          git config --global user.email "github-actions[bot]@users.noreply.github.com"

      - name: Commit and push changes
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        run: |
          PROJECTS_JSON='${{ steps.parse.outputs.projects }}'
          USERNAME="${{ steps.parse.outputs.username }}"
          TYPES=$(echo '${{ steps.parse.outputs.types }}' | python3 -c "import sys,json; print(', '.join(json.load(sys.stdin)))")
          PROJECTS_LIST=$(echo "$PROJECTS_JSON" | python3 -c "import sys,json; print(', '.join(json.load(sys.stdin)))")

          # Stage contributor files for each project
          for project in $(echo "$PROJECTS_JSON" | python3 -c "import sys,json; print(' '.join(json.load(sys.stdin)))"); do
            git add "${project}/.all-contributorsrc" "${project}/README.md" 2>/dev/null || true
          done
          git add README.md 2>/dev/null || true

          if git diff --staged --quiet; then
            echo "No changes to commit"
          else
            git commit -m "docs: add @${USERNAME} as contributor for ${TYPES} (${PROJECTS_LIST})"
            git pull --rebase origin ${{ env.TARGET_BRANCH }}
            git push origin ${{ env.TARGET_BRANCH }}
            echo "Changes committed and pushed!"
          fi

      # =====================================================================
      # STEP 5: Post success comment
      # =====================================================================
      - name: React to comment
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.reactions.createForIssueComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              comment_id: context.payload.comment.id,
              content: '+1'
            });

            const username = '${{ steps.parse.outputs.username }}';
            const projects = JSON.parse('${{ steps.parse.outputs.projects }}');
            const projectSource = '${{ steps.parse.outputs.project_source }}';
            const types = JSON.parse('${{ steps.parse.outputs.types }}');
            const triggerLine = `${{ steps.extract.outputs.trigger_line }}`;

            const sourceLabels = {
              comment: 'explicitly mentioned in comment',
              pr_files: 'detected from PR changed files',
              issue_context: 'detected from issue labels/title'
            };
            const sourceNote = sourceLabels[projectSource] || projectSource;

            const projectList = projects.length === 1 ? projects[0] : projects.join(', ');
            const filesList = projects.map(p => `- \`${p}/.all-contributorsrc\`, \`${p}/README.md\``).join('\n');

            const body = [
              "I've added @" + username + " as a contributor" + (projects.length > 1 ? " to **" + projectList + "**" : " to **" + projects[0] + "**") + "! :tada:",
              "",
              "**Recognized for:** " + types.join(', '),
              "**Project(s):** " + projectList + " (" + sourceNote + ")",
              "**Based on:** " + triggerLine,
              "",
              "The contributor list has been updated in:",
              filesList,
              "- Main `README.md`",
              "",
              "We love recognizing our contributors! :heart:"
            ].join('\n');

            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: body
            });

      # =====================================================================
      # STEP 6: Handle failures — ask user when project is unknown
      # =====================================================================
      - name: Handle parsing failure
        if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'false'
        uses: actions/github-script@v8
        env:
          PROJECTS: ${{ env.PROJECTS }}
        with:
          script: |
            const error = '${{ steps.parse.outputs.error }}';
            const triggerLine = `${{ steps.extract.outputs.trigger_line }}`;
            const projects = process.env.PROJECTS.split(',');
            const projectSource = '${{ steps.parse.outputs.project_source }}' || '';
            const username = '${{ steps.parse.outputs.username }}' || '';
            const typesRaw = '${{ steps.parse.outputs.types }}' || '[]';
            const types = (() => { try { return JSON.parse(typesRaw); } catch { return []; } })();

            let body;

            if (error === 'no_project') {
              // === PROJECT UNKNOWN — ask the user ===
              const userPart = username ? ` @${username}` : '';
              const typesPart = types.length > 0 ? ` for ${types.join(', ')}` : ' for code';

              if (projectSource === 'ambiguous') {
                body = [
                  "This PR touches files in **multiple projects**, so I need you to tell me which one(s). :thinking:",
                  "",
                  `I detected${userPart}${typesPart}, but which project(s) should I add them to?`,
                  "",
                  "You can specify **one or more** projects in your reply, e.g.:",
                  "- `@all-contributors" + userPart + typesPart + " in tinytorch`",
                  "- `@all-contributors" + userPart + typesPart + " in tinytorch, book, kits`",
                  "",
                  "Options: " + projects.map(p => `\`${p}\``).join(', '),
                ].join('\n');
              } else {
                body = [
                  `I couldn't determine which project(s) to add the contributor to. :thinking:`,
                  "",
                  "**Your comment:** " + triggerLine,
                  "",
                  "This repo has multiple projects. Specify one or more explicitly, e.g.:",
                  "- `@all-contributors" + userPart + typesPart + " in tinytorch`",
                  "- `@all-contributors" + userPart + typesPart + " in TinyTorch, Book, Kits`",
                  "",
                  "**How project detection works:**",
                  "- **In comment:** Say \"in TinyTorch\", \"for book, labs\", etc. (multiple projects OK)",
                  "- **On PRs:** Auto-detected from changed file paths when only one project is touched",
                  "- **On issues:** From labels or title, or specify in the comment",
                ].join('\n');
              }
            } else {
              // === Other errors (no_username, no_types) ===
              let errorMsg = "I couldn't parse that comment.";
              if (error === 'no_username') {
                errorMsg = "I couldn't find a GitHub username in that comment.";
              } else if (error === 'no_types') {
                errorMsg = "I couldn't determine the contribution type.";
              }

              body = [
                errorMsg + " :thinking:",
                "",
                "**Your comment:** " + triggerLine,
                "",
                "**Example formats that work:**",
                "```",
                "@all-contributors @jane-doe fixed typos in the documentation",
                "@all-contributors please add @john_smith for Doc in TinyTorch",
                "@all-contributors @user123 for code, doc in tinytorch, book",
                "@all-contributors @dev42 implemented the new caching feature in tinytorch",
                "```",
                "",
                "**Contribution types:** bug, code, doc, design, ideas, review, test, tool",
                "",
                `**Projects (one or more):** ${projects.join(', ')} — specify in comment (e.g. "in TinyTorch, Book") or auto-detected from PR file paths.`
              ].join('\n');
            }

            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: body
            });