Files
cs249r_book/.github/workflows/all-contributors-add.yml
Vijay Janapa Reddi a7f9367e42 Merge dev into feature/book-volumes: CI, contributors, workflows
# Conflicts:
#	README.md
2026-03-02 09:38:47 -05:00

620 lines
27 KiB
YAML

# =============================================================================
# ALL-CONTRIBUTORS AUTO-ADD WORKFLOW (LLM-Powered)
# =============================================================================
# Automatically adds contributors when someone comments with @all-contributors.
#
# Username is extracted DETERMINISTICALLY via regex from @mentions.
# Uses Ollama LLM ONLY to classify contribution type(s) from natural language.
#
# Project detection is DETERMINISTIC (not LLM-guessed); MULTIPLE projects supported:
# - Explicit mention in comment: "in TinyTorch", "for book, kits" → adds to all mentioned
# - PR file paths (single project only): tinytorch/ → tinytorch, book/ → book, etc.
# - Issue labels/title context
# - If none of the above → asks the user (never silently defaults)
#
# Flexible formats - all of these work:
# @all-contributors @username helped verify the fix worked
# @all-contributors please add @jane-doe for Doc in TinyTorch
# @all-contributors @user123 for code, doc in tinytorch, book
# @all-contributors @dev42 implemented feature and wrote tests in tinytorch
# =============================================================================
name: '🤖 All Contributors Add'
on:
issue_comment:
types: [created, edited]
# =============================================================================
# CONFIGURATION - Edit these values to customize the workflow
# =============================================================================
env:
# LLM Configuration
LLM_MODEL: 'llama3.1:8b'
# Git Configuration
TARGET_BRANCH: 'dev'
# Valid contribution types (comma-separated)
CONTRIBUTION_TYPES: 'bug,code,doc,design,ideas,review,test,tool'
# Valid projects (comma-separated)
PROJECTS: 'book,tinytorch,kits,labs'
# Project aliases (format: alias1:project1,alias2:project2)
PROJECT_ALIASES: 'tito:tinytorch'
jobs:
add-contributor:
name: Add Contributor
# Only run if comment contains the trigger phrase
if: contains(github.event.comment.body, '@all-contributors')
runs-on: ubuntu-latest
permissions:
contents: write
issues: write
pull-requests: write
steps:
# =====================================================================
# STEP 1: Extract trigger line + detect project from PR files
# =====================================================================
- name: Extract trigger line, username, and detect project
id: extract
uses: actions/github-script@v8
env:
PROJECTS: ${{ env.PROJECTS }}
PROJECT_ALIASES: ${{ env.PROJECT_ALIASES }}
with:
script: |
const body = context.payload.comment.body;
// Find the line containing @all-contributors
const lines = body.split('\n');
const triggerLine = lines.find(line => line.includes('@all-contributors'));
if (!triggerLine) {
console.log('No @all-contributors line found');
core.setOutput('should_run', 'false');
return;
}
console.log('Trigger line:', triggerLine);
// --- Configuration ---
const validProjects = process.env.PROJECTS.split(',');
const projectAliases = {};
if (process.env.PROJECT_ALIASES) {
process.env.PROJECT_ALIASES.split(',').forEach(pair => {
const [alias, proj] = pair.split(':');
if (alias && proj) projectAliases[alias.trim()] = proj.trim();
});
}
// --- Helper: detect ALL project names in text (for multi-project support) ---
const detectProjectsInText = (text) => {
const lower = text.toLowerCase();
const found = new Set();
for (const p of validProjects) {
if (lower.includes(p)) found.add(p);
}
for (const [alias, proj] of Object.entries(projectAliases)) {
if (lower.includes(alias)) found.add(proj);
}
return [...found];
};
// --- Get issue/PR context ---
const issue = context.payload.issue;
const labels = issue.labels.map(l => l.name.toLowerCase());
const issueContext = `Issue title: ${issue.title}\nLabels: ${labels.join(', ') || 'none'}`;
// =============================================================
// PROJECT DETECTION (deterministic, priority order)
// Supports multiple projects in one comment, e.g. "in TinyTorch, Book, Kits"
// =============================================================
let projects = [];
let projectSource = 'unknown';
// Priority 1: Explicit mention(s) in the trigger comment (can be multiple)
const commentProjects = detectProjectsInText(triggerLine);
if (commentProjects.length > 0) {
projects = commentProjects;
projectSource = 'comment';
console.log(`Projects from comment: ${JSON.stringify(projects)}`);
}
// Priority 2: PR changed files (top-level dir → project)
if (projects.length === 0 && issue.pull_request) {
try {
const { data: files } = await github.rest.pulls.listFiles({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: issue.number,
per_page: 100
});
const projectCounts = {};
for (const file of files) {
const topDir = file.filename.split('/')[0];
if (validProjects.includes(topDir)) {
projectCounts[topDir] = (projectCounts[topDir] || 0) + 1;
}
}
const detected = Object.keys(projectCounts);
console.log('PR file project counts:', JSON.stringify(projectCounts));
if (detected.length === 1) {
projects = [detected[0]];
projectSource = 'pr_files';
console.log(`Project from PR files: "${projects[0]}"`);
} else if (detected.length > 1) {
projectSource = 'ambiguous';
console.log('PR spans multiple projects:', detected.join(', '));
}
} catch (e) {
console.log('Could not fetch PR files:', e.message);
}
}
// Priority 3: Issue labels / title
if (projects.length === 0) {
const contextProjects = detectProjectsInText(issueContext);
if (contextProjects.length > 0) {
projects = contextProjects;
projectSource = 'issue_context';
console.log(`Projects from issue context: ${JSON.stringify(projects)}`);
}
}
console.log(`Final projects: ${JSON.stringify(projects)} (source: ${projectSource})`);
// =============================================================
// USERNAME EXTRACTION (deterministic — regex, not LLM)
// =============================================================
const mentions = triggerLine.match(/@([\w][\w-]*)/g);
const cleanMentions = mentions
? mentions.map(m => m.replace(/^@/, '')).filter(m => m !== 'all-contributors')
: [];
const username = cleanMentions.length > 0 ? cleanMentions[0] : '';
console.log(`Username from @mention: "${username}"`);
if (!username) {
console.log('No username @mention found in trigger line');
}
core.setOutput('should_run', 'true');
core.setOutput('trigger_line', triggerLine);
core.setOutput('username', username);
core.setOutput('issue_context', issueContext);
core.setOutput('projects', JSON.stringify(projects));
core.setOutput('project', projects.length > 0 ? projects[0] : '');
core.setOutput('project_source', projectSource);
# =====================================================================
# STEP 2: LLM classifies contribution types ONLY (username is from regex)
# =====================================================================
- name: Classify contribution types with LLM
if: steps.extract.outputs.should_run == 'true' && steps.extract.outputs.username != ''
uses: ai-action/ollama-action@v2
id: llm
with:
model: ${{ env.LLM_MODEL }}
prompt: |
Classify the contribution type(s) from this comment.
COMMENT: ${{ steps.extract.outputs.trigger_line }}
CONTRIBUTION TYPES (pick one or more):
- bug: Found or reported a bug, identified issues
- code: Wrote code, implemented features, fixed bugs
- doc: Wrote documentation, improved docs, fixed typos
- design: UI/UX design, visual design, architecture design
- ideas: Suggested ideas, proposed features, brainstormed
- review: Reviewed code or PRs, gave feedback on changes
- test: Tested features, verified fixes, QA testing
- tool: Built tools, scripts, automation, CLI utilities
Return ONLY a JSON object with exactly this field:
{
"types": ["<contribution-type>"]
}
RULES:
- types: Array of one or more contribution types from the list above.
- Do NOT include username or project fields. Those are detected separately.
EXAMPLES:
Input: "@all-contributors @jane-doe fixed typos in the documentation"
Output: {"types": ["doc"]}
Input: "@all-contributors @dev42 implemented the new feature and wrote tests"
Output: {"types": ["code", "test"]}
Input: "@all-contributors please add @user123 for code"
Output: {"types": ["code"]}
Input: "@all-contributors @reviewer99 gave feedback on the PR"
Output: {"types": ["review"]}
Return ONLY the JSON object, no explanation or other text.
# =====================================================================
# STEP 3: Parse LLM types + combine with deterministic username & project
# =====================================================================
- name: Validate and combine results
if: steps.extract.outputs.should_run == 'true'
id: parse
uses: actions/github-script@v8
env:
LLM_RESPONSE: ${{ steps.llm.outputs.response || '' }}
USERNAME: ${{ steps.extract.outputs.username }}
TRIGGER_LINE: ${{ steps.extract.outputs.trigger_line }}
PROJECTS_JSON: ${{ steps.extract.outputs.projects }}
PROJECT_SOURCE: ${{ steps.extract.outputs.project_source }}
CONTRIBUTION_TYPES: ${{ env.CONTRIBUTION_TYPES }}
PROJECTS: ${{ env.PROJECTS }}
with:
script: |
const response = process.env.LLM_RESPONSE || '';
const username = process.env.USERNAME || '';
const triggerLine = process.env.TRIGGER_LINE || '';
const validTypes = process.env.CONTRIBUTION_TYPES.split(',');
const validProjects = process.env.PROJECTS.split(',');
let projects = [];
try {
projects = JSON.parse(process.env.PROJECTS_JSON || '[]');
if (!Array.isArray(projects)) projects = [];
} catch (e) {
console.log('Failed to parse projects JSON');
}
const projectSource = process.env.PROJECT_SOURCE || '';
console.log('Username (from regex):', username);
console.log('LLM response:', response);
console.log('Projects:', JSON.stringify(projects), `(source: ${projectSource})`);
// --- Validate username (extracted deterministically in Step 1) ---
if (!username) {
core.setOutput('success', 'false');
core.setOutput('error', 'no_username');
return;
}
// --- Parse contribution types from LLM response ---
let types = [];
try {
const jsonMatch = response.match(/\{[\s\S]*?\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
if (parsed.types && Array.isArray(parsed.types)) {
types = parsed.types
.map(t => t.toLowerCase().trim())
.filter(t => validTypes.includes(t));
}
}
} catch (e) {
console.log('Failed to parse LLM JSON:', e.message);
}
// --- Validate types ---
if (types.length === 0) {
core.setOutput('success', 'false');
core.setOutput('error', 'no_types');
core.setOutput('username', username);
return;
}
// --- Validate projects (one or more, all must be valid) ---
const validProjectList = projects.filter(p => p && validProjects.includes(p));
if (validProjectList.length === 0) {
console.log('No valid project(s) detected — will ask user');
core.setOutput('success', 'false');
core.setOutput('error', 'no_project');
core.setOutput('username', username);
core.setOutput('types', JSON.stringify(types));
core.setOutput('project_source', projectSource);
return;
}
// --- All good (may have multiple projects) ---
console.log('Final result:', { username, types, projects: validProjectList, projectSource });
core.setOutput('success', 'true');
core.setOutput('username', username);
core.setOutput('types', JSON.stringify(types));
core.setOutput('projects', JSON.stringify(validProjectList));
core.setOutput('project', validProjectList[0]);
core.setOutput('project_source', projectSource);
# =====================================================================
# STEP 4: Checkout, update config, generate READMEs, commit
# =====================================================================
- name: Checkout repository
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
uses: actions/checkout@v6
with:
ref: ${{ env.TARGET_BRANCH }}
fetch-depth: 0
- name: Setup Python
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Get user info from GitHub
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
id: userinfo
uses: actions/github-script@v8
with:
script: |
const username = '${{ steps.parse.outputs.username }}';
try {
const { data: user } = await github.rest.users.getByUsername({
username: username
});
core.setOutput('name', user.name || username);
core.setOutput('avatar_url', user.avatar_url);
core.setOutput('profile', user.html_url);
core.setOutput('found', 'true');
} catch (error) {
console.log(`User ${username} not found, using defaults`);
core.setOutput('name', username);
core.setOutput('avatar_url', `https://avatars.githubusercontent.com/${username}`);
core.setOutput('profile', `https://github.com/${username}`);
core.setOutput('found', 'false');
}
- name: Update contributor config
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
env:
PROJECTS: ${{ env.PROJECTS }}
run: |
python3 << 'EOF'
import json
import os
projects = json.loads('${{ steps.parse.outputs.projects }}')
username = "${{ steps.parse.outputs.username }}"
types = json.loads('${{ steps.parse.outputs.types }}')
name = "${{ steps.userinfo.outputs.name }}"
avatar_url = "${{ steps.userinfo.outputs.avatar_url }}"
profile = "${{ steps.userinfo.outputs.profile }}"
valid_projects = os.environ.get('PROJECTS', 'book').split(',')
config_paths = {p: f'{p}/.all-contributorsrc' for p in valid_projects}
updated_paths = []
for project in projects:
config_path = config_paths.get(project)
if not config_path or not os.path.isfile(config_path):
print(f"Skipping {project}: no config at {config_path}")
continue
with open(config_path, 'r') as f:
config = json.load(f)
contributors = config.get('contributors', [])
existing = None
for i, c in enumerate(contributors):
if c.get('login', '').lower() == username.lower():
existing = i
break
if existing is not None:
existing_types = set(contributors[existing].get('contributions', []))
new_types = existing_types | set(types)
contributors[existing]['contributions'] = sorted(list(new_types))
print(f"[{project}] Updated existing contributor {username} with types: {sorted(list(new_types))}")
else:
new_contributor = {
'login': username,
'name': name,
'avatar_url': avatar_url,
'profile': profile,
'contributions': sorted(types)
}
contributors.append(new_contributor)
print(f"[{project}] Added new contributor {username} with types: {sorted(types)}")
config['contributors'] = contributors
with open(config_path, 'w') as f:
json.dump(config, f, indent=4)
f.write('\n')
print(f"Updated {config_path}")
updated_paths.append(config_path)
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write(f"updated_configs={json.dumps(updated_paths)}\n")
EOF
- name: Generate README tables
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
run: |
# Generate tables for each project we updated
PROJECTS_JSON='${{ steps.parse.outputs.projects }}'
for project in $(echo "$PROJECTS_JSON" | python3 -c "import sys,json; print(' '.join(json.load(sys.stdin)))"); do
python3 ${{ github.workspace }}/.github/workflows/contributors/generate_readme_tables.py --project "$project" --update
done
# Also regenerate the main README
python3 ${{ github.workspace }}/.github/workflows/contributors/generate_main_readme.py
- name: Configure Git
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
- name: Commit and push changes
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
run: |
PROJECTS_JSON='${{ steps.parse.outputs.projects }}'
USERNAME="${{ steps.parse.outputs.username }}"
TYPES=$(echo '${{ steps.parse.outputs.types }}' | python3 -c "import sys,json; print(', '.join(json.load(sys.stdin)))")
PROJECTS_LIST=$(echo "$PROJECTS_JSON" | python3 -c "import sys,json; print(', '.join(json.load(sys.stdin)))")
# Stage contributor files for each project
for project in $(echo "$PROJECTS_JSON" | python3 -c "import sys,json; print(' '.join(json.load(sys.stdin)))"); do
git add "${project}/.all-contributorsrc" "${project}/README.md" 2>/dev/null || true
done
git add README.md 2>/dev/null || true
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "docs: add @${USERNAME} as contributor for ${TYPES} (${PROJECTS_LIST})"
git pull --rebase origin ${{ env.TARGET_BRANCH }}
git push origin ${{ env.TARGET_BRANCH }}
echo "Changes committed and pushed!"
fi
# =====================================================================
# STEP 5: Post success comment
# =====================================================================
- name: React to comment
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'true'
uses: actions/github-script@v8
with:
script: |
await github.rest.reactions.createForIssueComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: context.payload.comment.id,
content: '+1'
});
const username = '${{ steps.parse.outputs.username }}';
const projects = JSON.parse('${{ steps.parse.outputs.projects }}');
const projectSource = '${{ steps.parse.outputs.project_source }}';
const types = JSON.parse('${{ steps.parse.outputs.types }}');
const triggerLine = `${{ steps.extract.outputs.trigger_line }}`;
const sourceLabels = {
comment: 'explicitly mentioned in comment',
pr_files: 'detected from PR changed files',
issue_context: 'detected from issue labels/title'
};
const sourceNote = sourceLabels[projectSource] || projectSource;
const projectList = projects.length === 1 ? projects[0] : projects.join(', ');
const filesList = projects.map(p => `- \`${p}/.all-contributorsrc\`, \`${p}/README.md\``).join('\n');
const body = [
"I've added @" + username + " as a contributor" + (projects.length > 1 ? " to **" + projectList + "**" : " to **" + projects[0] + "**") + "! :tada:",
"",
"**Recognized for:** " + types.join(', '),
"**Project(s):** " + projectList + " (" + sourceNote + ")",
"**Based on:** " + triggerLine,
"",
"The contributor list has been updated in:",
filesList,
"- Main `README.md`",
"",
"We love recognizing our contributors! :heart:"
].join('\n');
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});
# =====================================================================
# STEP 6: Handle failures — ask user when project is unknown
# =====================================================================
- name: Handle parsing failure
if: steps.extract.outputs.should_run == 'true' && steps.parse.outputs.success == 'false'
uses: actions/github-script@v8
env:
PROJECTS: ${{ env.PROJECTS }}
with:
script: |
const error = '${{ steps.parse.outputs.error }}';
const triggerLine = `${{ steps.extract.outputs.trigger_line }}`;
const projects = process.env.PROJECTS.split(',');
const projectSource = '${{ steps.parse.outputs.project_source }}' || '';
const username = '${{ steps.parse.outputs.username }}' || '';
const typesRaw = '${{ steps.parse.outputs.types }}' || '[]';
const types = (() => { try { return JSON.parse(typesRaw); } catch { return []; } })();
let body;
if (error === 'no_project') {
// === PROJECT UNKNOWN — ask the user ===
const userPart = username ? ` @${username}` : '';
const typesPart = types.length > 0 ? ` for ${types.join(', ')}` : ' for code';
if (projectSource === 'ambiguous') {
body = [
"This PR touches files in **multiple projects**, so I need you to tell me which one(s). :thinking:",
"",
`I detected${userPart}${typesPart}, but which project(s) should I add them to?`,
"",
"You can specify **one or more** projects in your reply, e.g.:",
"- `@all-contributors" + userPart + typesPart + " in tinytorch`",
"- `@all-contributors" + userPart + typesPart + " in tinytorch, book, kits`",
"",
"Options: " + projects.map(p => `\`${p}\``).join(', '),
].join('\n');
} else {
body = [
`I couldn't determine which project(s) to add the contributor to. :thinking:`,
"",
"**Your comment:** " + triggerLine,
"",
"This repo has multiple projects. Specify one or more explicitly, e.g.:",
"- `@all-contributors" + userPart + typesPart + " in tinytorch`",
"- `@all-contributors" + userPart + typesPart + " in TinyTorch, Book, Kits`",
"",
"**How project detection works:**",
"- **In comment:** Say \"in TinyTorch\", \"for book, labs\", etc. (multiple projects OK)",
"- **On PRs:** Auto-detected from changed file paths when only one project is touched",
"- **On issues:** From labels or title, or specify in the comment",
].join('\n');
}
} else {
// === Other errors (no_username, no_types) ===
let errorMsg = "I couldn't parse that comment.";
if (error === 'no_username') {
errorMsg = "I couldn't find a GitHub username in that comment.";
} else if (error === 'no_types') {
errorMsg = "I couldn't determine the contribution type.";
}
body = [
errorMsg + " :thinking:",
"",
"**Your comment:** " + triggerLine,
"",
"**Example formats that work:**",
"```",
"@all-contributors @jane-doe fixed typos in the documentation",
"@all-contributors please add @john_smith for Doc in TinyTorch",
"@all-contributors @user123 for code, doc in tinytorch, book",
"@all-contributors @dev42 implemented the new caching feature in tinytorch",
"```",
"",
"**Contribution types:** bug, code, doc, design, ideas, review, test, tool",
"",
`**Projects (one or more):** ${projects.join(', ')} — specify in comment (e.g. "in TinyTorch, Book") or auto-detected from PR file paths.`
].join('\n');
}
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});