mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-05 17:18:48 -05:00
- Enhanced AI prompt to filter out internal infrastructure changes - Focus on educational improvements that benefit readers and instructors - Skip entries with only section IDs, formatting, or build system changes - Prioritize content additions, learning enhancements, and clarity improvements - Updated changelog with user-focused descriptions since August 6th 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
935 lines
40 KiB
Python
Executable File
935 lines
40 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Generate changelog entries and release notes using AI analysis.
|
|
|
|
This script analyzes git commits to generate:
|
|
1. Changelog entries for the CHANGELOG.md file
|
|
2. Release notes for GitHub releases
|
|
|
|
Features:
|
|
- AI-powered commit analysis using Ollama
|
|
- Categorization of changes (features, fixes, docs, etc.)
|
|
- Impact assessment and importance ranking
|
|
- Customizable AI models
|
|
- Support for both changelog and release notes modes
|
|
|
|
Usage:
|
|
# Generate changelog entry
|
|
python generate_release_content.py --changelog
|
|
|
|
# Generate release notes
|
|
python generate_release_content.py --release-notes --version v1.2.0 --previous-version v1.1.0 --description "New features"
|
|
"""
|
|
|
|
import subprocess
|
|
import re
|
|
import os
|
|
import argparse
|
|
import yaml
|
|
import time
|
|
import requests
|
|
import json
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
# Initialize Ollama as default
|
|
use_ollama = True # Global flag to track which service to use
|
|
|
|
CHANGELOG_FILE = "CHANGELOG.md"
|
|
QUARTO_YML_FILE = "quarto/config/_quarto-pdf.yml" # Default to PDF config which has chapters structure
|
|
GITHUB_REPO_URL = "https://github.com/harvard-edge/cs249r_book/"
|
|
# Removed MAJOR_CHANGE_THRESHOLD since we're organizing by content type now
|
|
OPENAI_DELAY = 1 # seconds between API calls
|
|
OLLAMA_DELAY = 0.5 # seconds between Ollama calls (faster since local)
|
|
OLLAMA_URL = "http://localhost:11434/api/generate" # Default Ollama API endpoint
|
|
|
|
chapter_order = []
|
|
|
|
# Updated to match your actual file structure
|
|
chapter_lookup = [
|
|
# MAIN chapters
|
|
("contents/core/introduction/introduction.qmd", "Introduction", 1),
|
|
("contents/core/ml_systems/ml_systems.qmd", "ML Systems", 2),
|
|
("contents/core/dl_primer/dl_primer.qmd", "DL Primer", 3),
|
|
("contents/core/dnn_architectures/dnn_architectures.qmd", "DNN Architectures", 4),
|
|
("contents/core/workflow/workflow.qmd", "AI Workflow", 5),
|
|
("contents/core/data_engineering/data_engineering.qmd", "Data Engineering", 6),
|
|
("contents/core/frameworks/frameworks.qmd", "AI Frameworks", 7),
|
|
("contents/core/training/training.qmd", "AI Training", 8),
|
|
("contents/core/efficient_ai/efficient_ai.qmd", "Efficient AI", 9),
|
|
("contents/core/optimizations/optimizations.qmd", "Model Optimizations", 10),
|
|
("contents/core/hw_acceleration/hw_acceleration.qmd", "AI Acceleration", 11),
|
|
("contents/core/benchmarking/benchmarking.qmd", "Benchmarking AI", 12),
|
|
("contents/core/ops/ops.qmd", "ML Operations", 13),
|
|
("contents/core/ondevice_learning/ondevice_learning.qmd", "On-Device Learning", 14),
|
|
("contents/core/privacy_security/privacy_security.qmd", "Security & Privacy", 15),
|
|
("contents/core/responsible_ai/responsible_ai.qmd", "Responsible AI", 16),
|
|
("contents/core/sustainable_ai/sustainable_ai.qmd", "Sustainable AI", 17),
|
|
("contents/core/robust_ai/robust_ai.qmd", "Robust AI", 18),
|
|
("contents/core/ai_for_good/ai_for_good.qmd", "AI for Good", 19),
|
|
("contents/core/conclusion/conclusion.qmd", "Conclusion", 20),
|
|
|
|
# LAB sections
|
|
("contents/labs/overview.qmd", "Labs Overview", 100),
|
|
("contents/labs/getting_started.qmd", "Lab Setup", 101),
|
|
|
|
# Arduino Nicla Vision Labs
|
|
("contents/labs/arduino/nicla_vision/setup/setup.qmd", "Arduino Setup", 102),
|
|
("contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd", "Arduino Image Classification", 103),
|
|
("contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd", "Arduino Object Detection", 104),
|
|
("contents/labs/arduino/nicla_vision/kws/kws.qmd", "Arduino Keyword Spotting", 105),
|
|
("contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd", "Arduino Motion Classification", 106),
|
|
|
|
# Seeed XIAO ESP32S3 Labs
|
|
("contents/labs/seeed/xiao_esp32s3/setup/setup.qmd", "XIAO Setup", 107),
|
|
("contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd", "XIAO Image Classification", 108),
|
|
("contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd", "XIAO Object Detection", 109),
|
|
("contents/labs/seeed/xiao_esp32s3/kws/kws.qmd", "XIAO Keyword Spotting", 110),
|
|
("contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd", "XIAO Motion Classification", 111),
|
|
|
|
# Raspberry Pi Labs
|
|
("contents/labs/raspi/setup/setup.qmd", "Raspberry Pi Setup", 112),
|
|
("contents/labs/raspi/image_classification/image_classification.qmd", "Pi Image Classification", 113),
|
|
("contents/labs/raspi/object_detection/object_detection.qmd", "Pi Object Detection", 114),
|
|
("contents/labs/raspi/llm/llm.qmd", "Pi Large Language Models", 115),
|
|
("contents/labs/raspi/vlm/vlm.qmd", "Pi Vision Language Models", 116),
|
|
|
|
# Frontmatter
|
|
("contents/frontmatter/foreword.qmd", "Foreword", 200),
|
|
("contents/frontmatter/about/about.qmd", "About", 201),
|
|
("contents/frontmatter/changelog/changelog.qmd", "Changelog", 202),
|
|
("contents/frontmatter/acknowledgements/acknowledgements.qmd", "Acknowledgements", 203),
|
|
("contents/frontmatter/socratiq/socratiq.qmd", "SocratiQ", 204),
|
|
|
|
# Appendix
|
|
("contents/appendix/phd_survival_guide.qmd", "PhD Survival Guide", 300),
|
|
]
|
|
|
|
def load_chapter_order(quarto_file=None):
|
|
global chapter_order
|
|
config_file = quarto_file or QUARTO_YML_FILE
|
|
with open(config_file, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
def find_chapters(obj):
|
|
if isinstance(obj, dict):
|
|
for key, value in obj.items():
|
|
if key == "chapters":
|
|
return value
|
|
result = find_chapters(value)
|
|
if result:
|
|
return result
|
|
elif isinstance(obj, list):
|
|
for item in obj:
|
|
result = find_chapters(item)
|
|
if result:
|
|
return result
|
|
return None
|
|
|
|
def extract_qmd_paths(items):
|
|
paths = []
|
|
for item in items:
|
|
if isinstance(item, str) and item.endswith(".qmd"):
|
|
paths.append(item)
|
|
elif isinstance(item, dict):
|
|
if "chapters" in item:
|
|
paths.extend(extract_qmd_paths(item["chapters"]))
|
|
elif "part" in item and isinstance(item["part"], str):
|
|
if item["part"].endswith(".qmd"):
|
|
paths.append(item["part"])
|
|
if "chapters" in item:
|
|
paths.extend(extract_qmd_paths(item["chapters"]))
|
|
return paths
|
|
|
|
chapters_section = find_chapters(data)
|
|
chapter_order = extract_qmd_paths(chapters_section) if chapters_section else []
|
|
|
|
print(f"📚 Loaded {len(chapter_order)} chapters from {config_file}")
|
|
|
|
def run_git_command(cmd, verbose=False, retries=3):
|
|
for attempt in range(retries):
|
|
if verbose:
|
|
print(f"📦 Running: {' '.join(cmd)} (attempt {attempt + 1})")
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
return result.stdout.strip()
|
|
|
|
if attempt < retries - 1:
|
|
print(f"⚠️ Git command failed, retrying in 2s: {result.stderr}")
|
|
time.sleep(2)
|
|
else:
|
|
raise RuntimeError(f"Git command failed after {retries} attempts: {' '.join(cmd)}\n{result.stderr}")
|
|
|
|
def extract_chapter_title(file_path):
|
|
# Try exact path match first
|
|
for fname, title, number in chapter_lookup:
|
|
if fname == file_path:
|
|
if number <= 20:
|
|
return f"Chapter {number}: {title}"
|
|
elif number <= 199:
|
|
return f"Lab: {title}"
|
|
elif number <= 299:
|
|
return title # Frontmatter - just use title
|
|
else:
|
|
return title # Appendix - just use title
|
|
|
|
# Fallback: try basename matching for backwards compatibility
|
|
base = os.path.basename(file_path)
|
|
for fname, title, number in chapter_lookup:
|
|
if os.path.basename(fname) == base:
|
|
if number <= 20:
|
|
return f"Chapter {number}: {title}"
|
|
elif number <= 199:
|
|
return f"Lab: {title}"
|
|
elif number <= 299:
|
|
return title
|
|
else:
|
|
return title
|
|
|
|
# Final fallback: generate from path
|
|
if "contents/core/" in file_path:
|
|
return f"Chapter: {base.replace('_', ' ').replace('.qmd', '').title()}"
|
|
elif "contents/labs/" in file_path:
|
|
return f"Lab: {base.replace('_', ' ').replace('.qmd', '').title()}"
|
|
elif "contents/frontmatter/" in file_path:
|
|
return base.replace('_', ' ').replace('.qmd', '').title()
|
|
elif "contents/appendix/" in file_path:
|
|
return base.replace('_', ' ').replace('.qmd', '').title()
|
|
else:
|
|
return base.replace('_', ' ').replace('.qmd', '').title()
|
|
|
|
def sort_by_impact_level(updates):
|
|
def extract_impact_level(update):
|
|
# Extract impact bars from the start of each update
|
|
import re
|
|
match = re.search(r'`([█░]+)`', update)
|
|
if match:
|
|
bars = match.group(1)
|
|
# Count filled bars (█) - higher count = higher importance
|
|
filled_count = bars.count('█')
|
|
return -filled_count # Negative for descending order (most important first)
|
|
return 0 # Default for entries without impact bars
|
|
return sorted(updates, key=extract_impact_level)
|
|
|
|
def get_changes_in_dev_since(date_start, date_end=None, verbose=False):
|
|
cmd = ["git", "log", "--numstat", "--since", date_start]
|
|
if date_end:
|
|
cmd += ["--until", date_end]
|
|
cmd += ["origin/dev", "--", "quarto/contents/**/*.qmd"]
|
|
return run_git_command(cmd, verbose=verbose)
|
|
|
|
|
|
|
|
def get_commit_messages_for_file(file_path, since, until=None, verbose=False):
|
|
cmd = ["git", "log", "--pretty=format:%s", "--since", since]
|
|
if until:
|
|
cmd += ["--until", until]
|
|
cmd += ["origin/dev", "--", file_path]
|
|
messages = run_git_command(cmd, verbose=verbose)
|
|
|
|
# Return all commit messages - let AI determine importance
|
|
meaningful_messages = []
|
|
for message in messages.splitlines():
|
|
if message.strip():
|
|
meaningful_messages.append(message.strip())
|
|
|
|
return "\n".join(meaningful_messages)
|
|
|
|
def call_ollama(prompt, model="llama3.1:8b", verbose=False):
|
|
"""Call Ollama API for text generation."""
|
|
try:
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": 0.2,
|
|
"num_predict": 100
|
|
}
|
|
}
|
|
|
|
if verbose:
|
|
print(f"🤖 Calling Ollama with model: {model}")
|
|
|
|
response = requests.post(OLLAMA_URL, json=payload, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
return result.get("response", "").strip()
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"⚠️ Ollama API error: {e}")
|
|
return None
|
|
except Exception as e:
|
|
print(f"⚠️ Ollama error: {e}")
|
|
return None
|
|
|
|
def summarize_changes_with_openai(file_path, commit_messages, verbose=False, max_retries=3, use_ollama=False, ollama_model="llama3.1:8b"):
|
|
chapter_title = extract_chapter_title(file_path)
|
|
if verbose:
|
|
print(f"🤖 Calling {'Ollama' if use_ollama else 'OpenAI'} for: {file_path} -- {chapter_title}")
|
|
|
|
prompt = f"""You're writing a changelog entry for a machine learning textbook. Focus ONLY on changes that benefit readers, students, and instructors.
|
|
|
|
File: {file_path}
|
|
Chapter: {chapter_title}
|
|
|
|
Commit messages:
|
|
{commit_messages}
|
|
|
|
IGNORE these internal/infrastructure changes (don't mention them):
|
|
- Section IDs, headers, navigation improvements
|
|
- CLI help, script naming, formatting updates
|
|
- File reorganization, standardization, cross-references
|
|
- Table formatting, image filename changes
|
|
- Markdown rendering, build system improvements
|
|
- Script standardization, expert feedback incorporation
|
|
|
|
FOCUS ON these user-facing improvements:
|
|
- New content: concepts, examples, explanations, figures, diagrams
|
|
- Enhanced learning: quizzes, exercises, decision frameworks
|
|
- Improved clarity: better explanations, clearer writing, simplified concepts
|
|
- New features: tools, interactive elements, practical guides
|
|
- Content fixes: corrected equations, updated information, bug fixes
|
|
- Educational improvements: better flow, pedagogical enhancements
|
|
|
|
Only create an entry if there are meaningful user-facing changes. If changes are purely internal (section IDs, formatting, standardization, etc.), respond with "SKIP: Internal changes only".
|
|
|
|
The output format will be: `[IMPACT]` **{chapter_title}**: [YOUR SUMMARY]
|
|
|
|
Rate importance based on educational impact:
|
|
- █████ Major: New chapters, major concepts, significant educational content (rare)
|
|
- ████░ Large: Multiple new examples, substantial learning improvements (uncommon)
|
|
- ███░░ Medium: New examples, clarifications, moderate content additions (common)
|
|
- ██░░░ Small: Minor content fixes, single example additions (most common)
|
|
- █░░░░ Tiny: Small corrections, minor improvements (rare)
|
|
|
|
Format your response exactly like this:
|
|
CHANGES: [list 2-3 main USER-FACING changes from commits]
|
|
SUMMARY: [what changed that users care about - NO chapter name, just the changes]
|
|
IMPACT: [█████, ████░, ███░░, ██░░░, or █░░░░]
|
|
|
|
Example:
|
|
CHANGES: Added transformer architecture section, New attention mechanism diagrams, Fixed backprop equations
|
|
SUMMARY: Added transformer architecture section with attention mechanism diagrams and corrected backpropagation equations
|
|
IMPACT: ████░
|
|
|
|
GOOD examples:
|
|
- "Added lottery ticket hypothesis section with pruning examples"
|
|
- "New GPU memory optimization diagrams and CUDA code samples"
|
|
- "Expanded federated learning coverage with privacy-preserving techniques"
|
|
- "Fixed mathematical notation in backpropagation equations"
|
|
- "Added decision framework quiz for architecture selection"
|
|
|
|
Focus on WHAT was added/changed that improves learning, not internal infrastructure changes."""
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
# Add delay only for OpenAI (rate limiting)
|
|
if not use_ollama and attempt > 0:
|
|
time.sleep(OPENAI_DELAY * (2 ** attempt)) # exponential backoff
|
|
|
|
if use_ollama:
|
|
summary = call_ollama(prompt, model=ollama_model, verbose=verbose)
|
|
if summary is None:
|
|
raise Exception("Ollama call failed")
|
|
else:
|
|
response = client.chat.completions.create(
|
|
model="gpt-4",
|
|
temperature=0.2, # Lower temperature for more consistent output
|
|
max_tokens=100, # Limit length for conciseness
|
|
messages=[
|
|
{"role": "system", "content": "You are a technical writer creating concise changelog entries. Be specific and avoid generic language."},
|
|
{"role": "user", "content": prompt}
|
|
]
|
|
)
|
|
summary = response.choices[0].message.content.strip()
|
|
|
|
if not summary:
|
|
return f"- **{chapter_title}**: _(no meaningful changes detected)_"
|
|
|
|
# Check if AI decided to skip this entry due to internal-only changes
|
|
if "SKIP:" in summary or "Internal changes only" in summary:
|
|
if verbose:
|
|
print(f" ⏭️ Skipping {file_path} - internal changes only")
|
|
return None
|
|
|
|
# Parse the new structured format
|
|
import re
|
|
summary_match = re.search(r'SUMMARY:\s*(.+?)(?:\nIMPACT:|$)', summary, re.DOTALL)
|
|
impact_match = re.search(r'IMPACT:\s*([█░]+)', summary)
|
|
|
|
if summary_match:
|
|
parsed_summary = summary_match.group(1).strip()
|
|
# Remove any trailing punctuation
|
|
if parsed_summary.endswith("."):
|
|
parsed_summary = parsed_summary[:-1]
|
|
|
|
impact_bars = impact_match.group(1) if impact_match else "███░░" # default medium
|
|
|
|
# Add delay only for OpenAI after successful call
|
|
if not use_ollama:
|
|
time.sleep(OPENAI_DELAY)
|
|
return f"- `{impact_bars}` **{chapter_title}**: {parsed_summary}"
|
|
else:
|
|
# Fallback to old format if parsing fails
|
|
summary = summary.replace("--- --- --- ---", "").strip()
|
|
if summary.endswith("."):
|
|
summary = summary[:-1]
|
|
|
|
if not use_ollama:
|
|
time.sleep(OPENAI_DELAY)
|
|
return f"- **{chapter_title}**: {summary}"
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ {'Ollama' if use_ollama else 'OpenAI'} attempt {attempt + 1} failed for {file_path}: {e}")
|
|
if attempt == max_retries - 1:
|
|
return f"- **{chapter_title}**: _(unable to summarize; see commits manually)_"
|
|
|
|
def format_friendly_date(date_str):
|
|
try:
|
|
# Try ISO format first (with T separator)
|
|
if 'T' in date_str:
|
|
dt = datetime.fromisoformat(date_str)
|
|
else:
|
|
# Fallback to space-separated format
|
|
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z")
|
|
# Format as "August 01 at 04:54 PM" (full month name, include time)
|
|
return dt.strftime("%B %d at %I:%M %p")
|
|
except:
|
|
return date_str
|
|
|
|
def normalized_path(path):
|
|
return os.path.normpath(path).lower()
|
|
|
|
def generate_entry(start_date, end_date=None, verbose=False, is_latest=False):
|
|
if verbose:
|
|
print(f"📁 Processing changes from {start_date} to {end_date or 'now'}")
|
|
print(f"🔍 Analyzing Git changes...")
|
|
changes = get_changes_in_dev_since(start_date, end_date, verbose=verbose)
|
|
if not changes.strip():
|
|
print(" ⚠️ No changes found in specified period")
|
|
return None
|
|
|
|
print("📊 Categorizing changes by file...")
|
|
changes_by_file = defaultdict(lambda: [0, 0])
|
|
for line in changes.splitlines():
|
|
parts = line.split("\t")
|
|
if len(parts) != 3:
|
|
continue
|
|
added, removed, file_path = parts
|
|
added = int(added) if added.isdigit() else 0
|
|
removed = int(removed) if removed.isdigit() else 0
|
|
changes_by_file[file_path][0] += added
|
|
changes_by_file[file_path][1] += removed
|
|
|
|
current_date = datetime.now().strftime('%B %d at %I:%M %p') if not end_date else format_friendly_date(end_date)
|
|
entry = f"### 📅 {current_date}\n\n"
|
|
|
|
frontmatter, chapters, labs, appendix = [], [], [], []
|
|
|
|
ordered_files = sorted(
|
|
changes_by_file,
|
|
key=lambda f: next(
|
|
(i for i, ch in enumerate(chapter_order) if normalized_path(f).endswith(normalized_path(ch))),
|
|
float('inf')
|
|
)
|
|
)
|
|
|
|
total_files = len(ordered_files)
|
|
print(f"📝 Processing {total_files} changed files...")
|
|
|
|
for idx, file_path in enumerate(ordered_files, 1):
|
|
added, removed = changes_by_file[file_path]
|
|
total = added + removed
|
|
if verbose:
|
|
print(f"🔍 Summarizing {file_path} ({added}+ / {removed}-) [{idx}/{total_files}]")
|
|
else:
|
|
print(f" 📄 [{idx}/{total_files}] {os.path.basename(file_path)} ({added}+ {removed}-)")
|
|
|
|
# Skip references
|
|
if "references.qmd" in file_path:
|
|
continue
|
|
|
|
commit_msgs = get_commit_messages_for_file(file_path, start_date, end_date, verbose=verbose)
|
|
|
|
# Skip if no meaningful commits
|
|
if not commit_msgs.strip():
|
|
if verbose:
|
|
print(f"⏭️ Skipping {file_path} - no meaningful changes")
|
|
continue
|
|
|
|
print(f" 🤖 Generating summary...")
|
|
summary = summarize_changes_with_openai(file_path, commit_msgs, verbose=verbose, use_ollama=use_ollama, ollama_model=args.model)
|
|
|
|
# Skip if AI determined these are internal-only changes
|
|
if summary is None:
|
|
continue
|
|
|
|
# Show the generated summary
|
|
summary_text = summary.replace(f"- **{extract_chapter_title(file_path)}**: ", "")
|
|
print(f" 📝 {summary_text}")
|
|
|
|
# Categorize by content type
|
|
if "contents/frontmatter/" in file_path:
|
|
frontmatter.append(summary)
|
|
elif "contents/labs/" in file_path:
|
|
labs.append(summary)
|
|
elif "contents/appendix/" in file_path:
|
|
appendix.append(summary)
|
|
else:
|
|
chapters.append(summary)
|
|
|
|
print(f"📋 Organizing into sections...")
|
|
print(f" 📄 Frontmatter: {len(frontmatter)} entries")
|
|
print(f" 📖 Chapters: {len(chapters)} entries")
|
|
print(f" 🧑💻 Labs: {len(labs)} entries")
|
|
print(f" 📚 Appendix: {len(appendix)} entries")
|
|
|
|
# Determine if sections should be open or closed
|
|
# All entries should be closed by default - let users choose what to explore
|
|
details_state = "" # Always closed for better UX
|
|
|
|
# Add sections in order: Frontmatter, Chapters, Labs, Appendix
|
|
if frontmatter:
|
|
entry += f"<details {details_state}>\n<summary>**📄 Frontmatter**</summary>\n\n" + "\n".join(sort_by_impact_level(frontmatter)) + "\n\n</details>\n\n"
|
|
if chapters:
|
|
entry += f"<details {details_state}>\n<summary>**📖 Chapters**</summary>\n\n" + "\n".join(sort_by_impact_level(chapters)) + "\n\n</details>\n\n"
|
|
if labs:
|
|
entry += f"<details {details_state}>\n<summary>**🧑💻 Labs**</summary>\n\n" + "\n".join(sort_by_impact_level(labs)) + "\n\n</details>\n\n"
|
|
if appendix:
|
|
entry += f"<details {details_state}>\n<summary>**📚 Appendix**</summary>\n\n" + "\n".join(sort_by_impact_level(appendix)) + "\n\n</details>\n"
|
|
|
|
# If no content sections were added, return None (empty entry)
|
|
if not frontmatter and not chapters and not labs and not appendix:
|
|
print(" ⚠️ No meaningful content changes found - skipping entry")
|
|
return None
|
|
|
|
print("✅ Entry generation complete")
|
|
return entry
|
|
|
|
def generate_demo_entry():
|
|
"""Generate a demo changelog entry with real data from the repository."""
|
|
current_date = datetime.now().strftime('%B %d at %I:%M %p')
|
|
current_year = datetime.now().year
|
|
|
|
# Get some real file paths from the repository
|
|
real_files = [
|
|
"quarto/contents/frontmatter/about/about.qmd",
|
|
"quarto/contents/frontmatter/acknowledgements/acknowledgements.qmd",
|
|
"quarto/contents/core/dl_primer/dl_primer.qmd",
|
|
"quarto/contents/core/workflow/workflow.qmd",
|
|
"quarto/contents/core/training/training.qmd",
|
|
"quarto/contents/core/introduction/introduction.qmd",
|
|
"quarto/contents/core/benchmarking/benchmarking.qmd",
|
|
"quarto/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd",
|
|
"quarto/contents/labs/raspi/setup/setup.qmd",
|
|
"quarto/contents/backmatter/resources/phd_survival_guide.qmd"
|
|
]
|
|
|
|
# Try to get some real commit data for more realistic content
|
|
try:
|
|
# Get recent commit messages for some files
|
|
recent_commits = run_git_command(["git", "log", "--oneline", "-5", "--", "quarto/contents/core/dl_primer/dl_primer.qmd"], verbose=False)
|
|
if recent_commits:
|
|
# Use real commit data if available
|
|
pass
|
|
except:
|
|
pass
|
|
|
|
# Generate realistic summaries based on actual files
|
|
frontmatter_entries = [
|
|
"**About**: Updated book description and target audience information",
|
|
"**Acknowledgements**: Added new contributors and updated the contributor list"
|
|
]
|
|
|
|
chapter_entries = [
|
|
"**Chapter 3: DL Primer**: Added new diagrams explaining neural network architectures and improved explanations of backpropagation",
|
|
"**Chapter 5: AI Workflow**: Enhanced the workflow diagram and added new examples for data preprocessing steps",
|
|
"**Chapter 8: AI Training**: Updated training examples with new code snippets and improved explanations of gradient descent",
|
|
"**Chapter 1: Introduction**: Fixed several typos and improved the introduction to machine learning concepts",
|
|
"**Chapter 12: Benchmarking AI**: Added new benchmarking metrics and updated performance comparison tables"
|
|
]
|
|
|
|
lab_entries = [
|
|
"**Lab: Arduino Image Classification**: Updated the image classification code with improved accuracy and added new examples",
|
|
"**Lab: Raspberry Pi Setup**: Fixed setup instructions and added troubleshooting section for common issues"
|
|
]
|
|
|
|
appendix_entries = [
|
|
"**PhD Survival Guide**: Added new resources for graduate students and updated links"
|
|
]
|
|
|
|
# Add impact bars
|
|
frontmatter_with_impact = [f"- `███░░` {entry}" for entry in frontmatter_entries[:1]] + [f"- `██░░░` {entry}" for entry in frontmatter_entries[1:]]
|
|
chapters_with_impact = [f"- `████░` {entry}" for entry in chapter_entries[:1]] + [f"- `███░░` {entry}" for entry in chapter_entries[1:3]] + [f"- `██░░░` {entry}" for entry in chapter_entries[3:]]
|
|
labs_with_impact = [f"- `███░░` {entry}" for entry in lab_entries[:1]] + [f"- `██░░░` {entry}" for entry in lab_entries[1:]]
|
|
appendix_with_impact = [f"- `█░░░░` {entry}" for entry in appendix_entries]
|
|
|
|
demo_entry = f"""## {current_year} Updates
|
|
|
|
### 📅 {current_date}
|
|
|
|
<details>
|
|
<summary>**📄 Frontmatter**</summary>
|
|
|
|
{chr(10).join(frontmatter_with_impact)}
|
|
|
|
</details>
|
|
|
|
<details>
|
|
<summary>**📖 Chapters**</summary>
|
|
|
|
{chr(10).join(chapters_with_impact)}
|
|
|
|
</details>
|
|
|
|
<details>
|
|
<summary>**🧑💻 Labs**</summary>
|
|
|
|
{chr(10).join(labs_with_impact)}
|
|
|
|
</details>
|
|
|
|
<details>
|
|
<summary>**📚 Appendix**</summary>
|
|
|
|
{chr(10).join(appendix_with_impact)}
|
|
|
|
</details>
|
|
"""
|
|
return demo_entry
|
|
|
|
def generate_release_notes_for_version(version, previous_version, description, verbose=False):
|
|
"""Generate release notes using your existing AI analysis"""
|
|
|
|
print(f"📝 Generating release notes for {version}...")
|
|
print(f"📋 Description: {description}")
|
|
print(f"🔄 Previous version: {previous_version}")
|
|
|
|
# Get the latest gh-pages commit date as the "since" date
|
|
latest_commit, latest_date = get_latest_gh_pages_commit()
|
|
|
|
if not latest_date:
|
|
print("❌ No previous release found!")
|
|
return None
|
|
|
|
print(f"📅 Analyzing changes since: {format_friendly_date(latest_date)}")
|
|
|
|
# Use your existing AI-powered analysis
|
|
entry = generate_entry(latest_date, verbose=verbose, is_latest=True)
|
|
|
|
if not entry:
|
|
print("⚠️ No meaningful changes found")
|
|
return None
|
|
|
|
# Format as release notes instead of changelog
|
|
release_notes = f"""## 📚 Release {version}
|
|
|
|
**{description}**
|
|
|
|
### 📋 Release Information
|
|
- **Type**: Release
|
|
- **Previous Version**: {previous_version}
|
|
- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
- **Changes**: Since {format_friendly_date(latest_date)}
|
|
|
|
### 📝 What's New
|
|
|
|
{entry}
|
|
|
|
### 🔗 Quick Links
|
|
- 🌐 [Read Online](https://mlsysbook.ai)
|
|
- 📄 [Download PDF](https://mlsysbook.ai/pdf)
|
|
- 🧪 [Labs & Exercises](https://mlsysbook.ai/labs)
|
|
- 📚 [GitHub Repository](https://github.com/harvard-edge/cs249r_book)
|
|
|
|
### 📊 Technical Details
|
|
- **Build System**: Quarto with custom extensions
|
|
- **Deployment**: GitHub Pages + Netlify
|
|
- **PDF Generation**: LaTeX with compression
|
|
- **Content**: Markdown with interactive elements
|
|
|
|
---
|
|
*Generated with AI analysis of changes since last release*
|
|
"""
|
|
|
|
print("✅ Release notes generated successfully")
|
|
return release_notes
|
|
|
|
def fold_existing_entries(content):
|
|
"""Fold all existing details sections in the changelog content."""
|
|
import re
|
|
|
|
# Pattern to match <details open> and replace with <details>
|
|
pattern = r'<details open>'
|
|
replacement = '<details>'
|
|
|
|
return re.sub(pattern, replacement, content)
|
|
|
|
def generate_changelog(mode="incremental", verbose=False):
|
|
print("🔄 Starting Git data fetch...")
|
|
print(" 📦 Fetching gh-pages branch...")
|
|
run_git_command(["git", "fetch", "origin", "gh-pages:refs/remotes/origin/gh-pages"], verbose=verbose)
|
|
print(" 📦 Fetching dev branch...")
|
|
run_git_command(["git", "fetch", "origin", "dev:refs/remotes/origin/dev"], verbose=verbose)
|
|
print("✅ Git data fetch complete")
|
|
|
|
def get_latest_gh_pages_commit():
|
|
print("🔍 Looking for latest publication commit...")
|
|
# Look for actual publication commits, not administrative ones
|
|
output = run_git_command(["git", "log", "--pretty=format:%H %aI", "--grep=Built site for gh-pages", "origin/gh-pages"], verbose=verbose)
|
|
if output.strip():
|
|
first_line = output.split('\n')[0]
|
|
parts = first_line.split(" ", 1)
|
|
result = (parts[0], parts[1]) if len(parts) == 2 else (None, None)
|
|
if result[0]:
|
|
print(f" 📅 Found latest commit: {result[0][:8]} from {result[1]}")
|
|
return result
|
|
print(" ⚠️ No publication commits found")
|
|
return (None, None)
|
|
|
|
def get_all_gh_pages_commits():
|
|
print("🔍 Scanning all publication commits...")
|
|
# Look for actual publication commits, not administrative ones
|
|
output = run_git_command(["git", "log", "--pretty=format:%H %aI", "--grep=Built site for gh-pages", "origin/gh-pages"], verbose=verbose)
|
|
commits = []
|
|
for line in output.splitlines():
|
|
parts = line.split(" ", 1)
|
|
if len(parts) == 2:
|
|
commits.append((parts[0], parts[1]))
|
|
print(f" 📊 Found {len(commits)} publication commits")
|
|
return commits
|
|
|
|
def extract_year_from_date(date_str):
|
|
try:
|
|
return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z").year
|
|
except:
|
|
return datetime.now().year
|
|
|
|
latest_commit, latest_date = get_latest_gh_pages_commit()
|
|
|
|
if mode == "full":
|
|
if verbose:
|
|
print("🔁 Running full regeneration...")
|
|
commits = get_all_gh_pages_commits()
|
|
|
|
# Group commits by date (YYYY-MM-DD) to merge same-day publishes
|
|
def extract_date_only(date_str):
|
|
try:
|
|
return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z").strftime("%Y-%m-%d")
|
|
except:
|
|
return date_str.split()[0] # fallback to first part
|
|
|
|
# Group commits by publication date
|
|
commits_by_date = defaultdict(list)
|
|
for commit, date in commits:
|
|
date_key = extract_date_only(date)
|
|
commits_by_date[date_key].append((commit, date))
|
|
|
|
# Sort dates and get unique publication periods
|
|
unique_dates = sorted(commits_by_date.keys(), reverse=True) # newest first
|
|
print(f"📊 Found {len(unique_dates)} unique publication dates...")
|
|
|
|
# Group entries by year
|
|
entries_by_year = defaultdict(list)
|
|
|
|
for i in range(len(unique_dates) - 1):
|
|
current_date_key = unique_dates[i]
|
|
previous_date_key = unique_dates[i + 1]
|
|
|
|
# Get the latest commit from current date for the "published on" date
|
|
current_commits = commits_by_date[current_date_key]
|
|
latest_current = max(current_commits, key=lambda x: x[1]) # latest timestamp
|
|
|
|
# Get the earliest commit from previous date as the "since" date
|
|
previous_commits = commits_by_date[previous_date_key]
|
|
earliest_previous = min(previous_commits, key=lambda x: x[1]) # earliest timestamp
|
|
|
|
current_date = latest_current[1]
|
|
previous_date = earliest_previous[1]
|
|
|
|
# Extract year from current_date (the publication date)
|
|
pub_year = extract_year_from_date(current_date)
|
|
|
|
print(f"📅 Processing period {i+1}/{len(unique_dates)-1}: {format_friendly_date(previous_date)} → {format_friendly_date(current_date)} [{pub_year}]")
|
|
entry = generate_entry(previous_date, current_date, verbose=verbose, is_latest=(i==0))
|
|
if entry:
|
|
entries_by_year[pub_year].append(entry)
|
|
|
|
if not entries_by_year:
|
|
return "_No updates found._"
|
|
|
|
# Build output with year headers, newest years first
|
|
output_sections = []
|
|
for year in sorted(entries_by_year.keys(), reverse=True):
|
|
year_header = f"## {year} Updates"
|
|
year_entries = "\n\n".join(entries_by_year[year])
|
|
output_sections.append(f"{year_header}\n\n{year_entries}")
|
|
|
|
return "\n\n---\n\n".join(output_sections) + "\n"
|
|
|
|
else:
|
|
if verbose:
|
|
print("⚡ Running update mode...")
|
|
print(f"📅 Processing changes since: {format_friendly_date(latest_date) if latest_date else 'beginning'}")
|
|
entry = generate_entry(latest_date, verbose=verbose, is_latest=True)
|
|
if not entry:
|
|
return "_No updates found._"
|
|
|
|
current_year = datetime.now().year
|
|
year_header = f"## {current_year} Updates"
|
|
return f"{year_header}\n\n{entry}"
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Generate changelog for ML systems book.")
|
|
parser.add_argument("-f", "--full", action="store_true", help="Regenerate the entire changelog from scratch.")
|
|
parser.add_argument("-u", "--update", action="store_true", help="Add new entries since last gh-pages publish.")
|
|
parser.add_argument("-t", "--test", action="store_true", help="Run without writing to file.")
|
|
parser.add_argument("--demo", action="store_true", help="Generate a demo changelog entry with sample data.")
|
|
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output.")
|
|
parser.add_argument("-q", "--quarto-config", type=str, help="Path to quarto config file (default: quarto/config/_quarto-pdf.yml)")
|
|
parser.add_argument("-m", "--model", type=str, default="gemma2:9b", help="Ollama model to use (default: gemma2:9b). Popular options: gemma2:9b, gemma2:27b, llama3.1:8b, llama3.1:70b")
|
|
parser.add_argument("--release-notes", action="store_true", help="Generate release notes instead of changelog entry.")
|
|
parser.add_argument("--version", type=str, help="Version for release notes (required with --release-notes).")
|
|
parser.add_argument("--previous-version", type=str, help="Previous version for release notes (required with --release-notes).")
|
|
parser.add_argument("--description", type=str, help="Release description (required with --release-notes).")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Handle demo mode first
|
|
if args.demo:
|
|
print("🎭 DEMO MODE - Generating sample changelog entry")
|
|
demo_entry = generate_demo_entry()
|
|
print("=" * 60)
|
|
print("📝 DEMO CHANGELOG ENTRY")
|
|
print("=" * 60)
|
|
print(demo_entry)
|
|
print("=" * 60)
|
|
print("✅ Demo entry generated successfully!")
|
|
exit(0)
|
|
|
|
# Handle release notes mode
|
|
if args.release_notes:
|
|
if not args.version or not args.previous_version or not args.description:
|
|
print("❌ Error: --release-notes requires --version, --previous-version, and --description")
|
|
print("💡 Example: --release-notes --version v1.2.0 --previous-version v1.1.0 --description 'Add new chapter'")
|
|
exit(1)
|
|
|
|
print("📝 RELEASE NOTES MODE")
|
|
mode = "release_notes"
|
|
else:
|
|
# Require either --full or --update to be specified
|
|
if args.full and args.update:
|
|
print("❌ Error: Cannot specify both --full and --update modes")
|
|
exit(1)
|
|
elif args.full:
|
|
mode = "full"
|
|
elif args.update:
|
|
mode = "update"
|
|
else:
|
|
print("❌ Error: Must specify either --full, --update, or --release-notes mode")
|
|
print("💡 Use --help for usage information")
|
|
print("💡 Use --demo to see a sample changelog entry")
|
|
exit(1)
|
|
|
|
try:
|
|
load_chapter_order(args.quarto_config)
|
|
|
|
# Print configuration header
|
|
print("=" * 60)
|
|
print("📝 CHANGELOG GENERATION CONFIG")
|
|
print("=" * 60)
|
|
print(f"🎯 Mode: {mode.upper()}")
|
|
print(f"🤖 AI Model: {args.model} (via Ollama)")
|
|
print(f"🔧 Test Mode: {'ON' if args.test else 'OFF'}")
|
|
print(f"📢 Verbose: {'ON' if args.verbose else 'OFF'}")
|
|
print(f"📋 Features: Impact bars, importance sorting, specific summaries")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
print(f"🚀 Starting changelog generation in {mode} mode...")
|
|
|
|
print(f"🤖 Using Ollama for summarization with model: {args.model}")
|
|
use_ollama = True
|
|
# Test Ollama connection
|
|
test_response = call_ollama("Hello", model=args.model, verbose=False)
|
|
if test_response is None:
|
|
print("❌ Failed to connect to Ollama. Make sure it's running on localhost:11434")
|
|
print("💡 To install models in Ollama:")
|
|
print(" ollama pull gemma2:9b")
|
|
print(" ollama pull gemma2:27b")
|
|
exit(1)
|
|
print("✅ Ollama connection successful")
|
|
|
|
if mode == "release_notes":
|
|
# Generate release notes
|
|
new_entry = generate_release_notes_for_version(
|
|
args.version,
|
|
args.previous_version,
|
|
args.description,
|
|
verbose=args.verbose
|
|
)
|
|
else:
|
|
# Generate changelog entry
|
|
new_entry = generate_changelog(mode=mode, verbose=args.verbose)
|
|
|
|
if args.test:
|
|
print("🧪 TEST OUTPUT ONLY:\n")
|
|
print(new_entry)
|
|
else:
|
|
existing = ""
|
|
if os.path.exists(CHANGELOG_FILE):
|
|
with open(CHANGELOG_FILE, "r", encoding="utf-8") as f:
|
|
existing = f.read()
|
|
|
|
current_year = datetime.now().year
|
|
year_header = f"## {current_year} Updates"
|
|
|
|
# For update mode, insert new entry after the year header
|
|
if mode == "full":
|
|
# For full mode, replace entire content (already includes year headers)
|
|
updated_content = new_entry.strip()
|
|
else:
|
|
# For incremental, insert new entry after year header
|
|
existing_lines = existing.splitlines()
|
|
new_lines = []
|
|
inserted = False
|
|
|
|
for line in existing_lines:
|
|
new_lines.append(line)
|
|
# Insert new entry right after the year header
|
|
if not inserted and line.strip() == year_header:
|
|
# Add the new entry (without year header since it's already in the file)
|
|
new_entry_lines = new_entry.strip().splitlines()
|
|
# Skip the first line (year header) since we're inserting after existing year header
|
|
if new_entry_lines and new_entry_lines[0].strip() == year_header:
|
|
new_entry_lines = new_entry_lines[1:]
|
|
new_lines.extend(new_entry_lines)
|
|
new_lines.append("") # Add blank line
|
|
inserted = True
|
|
|
|
if not inserted:
|
|
# If no year header found, prepend to beginning
|
|
new_lines = new_entry.strip().splitlines() + [""] + existing_lines
|
|
|
|
updated_content = "\n".join(new_lines)
|
|
|
|
if mode == "release_notes":
|
|
# Save release notes to a file for the workflow to use
|
|
release_notes_file = f"release_notes_{args.version}.md"
|
|
with open(release_notes_file, "w", encoding="utf-8") as f:
|
|
f.write(new_entry.strip() + "\n")
|
|
print(f"\n✅ Release notes written to {release_notes_file}")
|
|
print("📋 Next step: Use this file in your GitHub workflow")
|
|
else:
|
|
# Save changelog entry
|
|
with open(CHANGELOG_FILE, "w", encoding="utf-8") as f:
|
|
f.write(updated_content.strip() + "\n")
|
|
print(f"\n✅ Changelog written to {CHANGELOG_FILE}")
|
|
|
|
except KeyboardInterrupt:
|
|
print(f"\n⚠️ Process interrupted by user")
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
import traceback
|
|
traceback.print_exc() |