Files
cs249r_book/tools/scripts/maintenance/generate_release_content.py
Vijay Janapa Reddi fd87823d4e enhance: improve changelog generation to focus on user-facing changes
- Enhanced AI prompt to filter out internal infrastructure changes
- Focus on educational improvements that benefit readers and instructors
- Skip entries with only section IDs, formatting, or build system changes
- Prioritize content additions, learning enhancements, and clarity improvements
- Updated changelog with user-focused descriptions since August 6th

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-09 18:18:16 -04:00

935 lines
40 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Generate changelog entries and release notes using AI analysis.
This script analyzes git commits to generate:
1. Changelog entries for the CHANGELOG.md file
2. Release notes for GitHub releases
Features:
- AI-powered commit analysis using Ollama
- Categorization of changes (features, fixes, docs, etc.)
- Impact assessment and importance ranking
- Customizable AI models
- Support for both changelog and release notes modes
Usage:
# Generate changelog entry
python generate_release_content.py --changelog
# Generate release notes
python generate_release_content.py --release-notes --version v1.2.0 --previous-version v1.1.0 --description "New features"
"""
import subprocess
import re
import os
import argparse
import yaml
import time
import requests
import json
from collections import defaultdict
from datetime import datetime
# Initialize Ollama as default
use_ollama = True # Global flag to track which service to use
CHANGELOG_FILE = "CHANGELOG.md"
QUARTO_YML_FILE = "quarto/config/_quarto-pdf.yml" # Default to PDF config which has chapters structure
GITHUB_REPO_URL = "https://github.com/harvard-edge/cs249r_book/"
# Removed MAJOR_CHANGE_THRESHOLD since we're organizing by content type now
OPENAI_DELAY = 1 # seconds between API calls
OLLAMA_DELAY = 0.5 # seconds between Ollama calls (faster since local)
OLLAMA_URL = "http://localhost:11434/api/generate" # Default Ollama API endpoint
chapter_order = []
# Updated to match your actual file structure
chapter_lookup = [
# MAIN chapters
("contents/core/introduction/introduction.qmd", "Introduction", 1),
("contents/core/ml_systems/ml_systems.qmd", "ML Systems", 2),
("contents/core/dl_primer/dl_primer.qmd", "DL Primer", 3),
("contents/core/dnn_architectures/dnn_architectures.qmd", "DNN Architectures", 4),
("contents/core/workflow/workflow.qmd", "AI Workflow", 5),
("contents/core/data_engineering/data_engineering.qmd", "Data Engineering", 6),
("contents/core/frameworks/frameworks.qmd", "AI Frameworks", 7),
("contents/core/training/training.qmd", "AI Training", 8),
("contents/core/efficient_ai/efficient_ai.qmd", "Efficient AI", 9),
("contents/core/optimizations/optimizations.qmd", "Model Optimizations", 10),
("contents/core/hw_acceleration/hw_acceleration.qmd", "AI Acceleration", 11),
("contents/core/benchmarking/benchmarking.qmd", "Benchmarking AI", 12),
("contents/core/ops/ops.qmd", "ML Operations", 13),
("contents/core/ondevice_learning/ondevice_learning.qmd", "On-Device Learning", 14),
("contents/core/privacy_security/privacy_security.qmd", "Security & Privacy", 15),
("contents/core/responsible_ai/responsible_ai.qmd", "Responsible AI", 16),
("contents/core/sustainable_ai/sustainable_ai.qmd", "Sustainable AI", 17),
("contents/core/robust_ai/robust_ai.qmd", "Robust AI", 18),
("contents/core/ai_for_good/ai_for_good.qmd", "AI for Good", 19),
("contents/core/conclusion/conclusion.qmd", "Conclusion", 20),
# LAB sections
("contents/labs/overview.qmd", "Labs Overview", 100),
("contents/labs/getting_started.qmd", "Lab Setup", 101),
# Arduino Nicla Vision Labs
("contents/labs/arduino/nicla_vision/setup/setup.qmd", "Arduino Setup", 102),
("contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd", "Arduino Image Classification", 103),
("contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd", "Arduino Object Detection", 104),
("contents/labs/arduino/nicla_vision/kws/kws.qmd", "Arduino Keyword Spotting", 105),
("contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd", "Arduino Motion Classification", 106),
# Seeed XIAO ESP32S3 Labs
("contents/labs/seeed/xiao_esp32s3/setup/setup.qmd", "XIAO Setup", 107),
("contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd", "XIAO Image Classification", 108),
("contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd", "XIAO Object Detection", 109),
("contents/labs/seeed/xiao_esp32s3/kws/kws.qmd", "XIAO Keyword Spotting", 110),
("contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd", "XIAO Motion Classification", 111),
# Raspberry Pi Labs
("contents/labs/raspi/setup/setup.qmd", "Raspberry Pi Setup", 112),
("contents/labs/raspi/image_classification/image_classification.qmd", "Pi Image Classification", 113),
("contents/labs/raspi/object_detection/object_detection.qmd", "Pi Object Detection", 114),
("contents/labs/raspi/llm/llm.qmd", "Pi Large Language Models", 115),
("contents/labs/raspi/vlm/vlm.qmd", "Pi Vision Language Models", 116),
# Frontmatter
("contents/frontmatter/foreword.qmd", "Foreword", 200),
("contents/frontmatter/about/about.qmd", "About", 201),
("contents/frontmatter/changelog/changelog.qmd", "Changelog", 202),
("contents/frontmatter/acknowledgements/acknowledgements.qmd", "Acknowledgements", 203),
("contents/frontmatter/socratiq/socratiq.qmd", "SocratiQ", 204),
# Appendix
("contents/appendix/phd_survival_guide.qmd", "PhD Survival Guide", 300),
]
def load_chapter_order(quarto_file=None):
global chapter_order
config_file = quarto_file or QUARTO_YML_FILE
with open(config_file, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
def find_chapters(obj):
if isinstance(obj, dict):
for key, value in obj.items():
if key == "chapters":
return value
result = find_chapters(value)
if result:
return result
elif isinstance(obj, list):
for item in obj:
result = find_chapters(item)
if result:
return result
return None
def extract_qmd_paths(items):
paths = []
for item in items:
if isinstance(item, str) and item.endswith(".qmd"):
paths.append(item)
elif isinstance(item, dict):
if "chapters" in item:
paths.extend(extract_qmd_paths(item["chapters"]))
elif "part" in item and isinstance(item["part"], str):
if item["part"].endswith(".qmd"):
paths.append(item["part"])
if "chapters" in item:
paths.extend(extract_qmd_paths(item["chapters"]))
return paths
chapters_section = find_chapters(data)
chapter_order = extract_qmd_paths(chapters_section) if chapters_section else []
print(f"📚 Loaded {len(chapter_order)} chapters from {config_file}")
def run_git_command(cmd, verbose=False, retries=3):
for attempt in range(retries):
if verbose:
print(f"📦 Running: {' '.join(cmd)} (attempt {attempt + 1})")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
if attempt < retries - 1:
print(f"⚠️ Git command failed, retrying in 2s: {result.stderr}")
time.sleep(2)
else:
raise RuntimeError(f"Git command failed after {retries} attempts: {' '.join(cmd)}\n{result.stderr}")
def extract_chapter_title(file_path):
# Try exact path match first
for fname, title, number in chapter_lookup:
if fname == file_path:
if number <= 20:
return f"Chapter {number}: {title}"
elif number <= 199:
return f"Lab: {title}"
elif number <= 299:
return title # Frontmatter - just use title
else:
return title # Appendix - just use title
# Fallback: try basename matching for backwards compatibility
base = os.path.basename(file_path)
for fname, title, number in chapter_lookup:
if os.path.basename(fname) == base:
if number <= 20:
return f"Chapter {number}: {title}"
elif number <= 199:
return f"Lab: {title}"
elif number <= 299:
return title
else:
return title
# Final fallback: generate from path
if "contents/core/" in file_path:
return f"Chapter: {base.replace('_', ' ').replace('.qmd', '').title()}"
elif "contents/labs/" in file_path:
return f"Lab: {base.replace('_', ' ').replace('.qmd', '').title()}"
elif "contents/frontmatter/" in file_path:
return base.replace('_', ' ').replace('.qmd', '').title()
elif "contents/appendix/" in file_path:
return base.replace('_', ' ').replace('.qmd', '').title()
else:
return base.replace('_', ' ').replace('.qmd', '').title()
def sort_by_impact_level(updates):
def extract_impact_level(update):
# Extract impact bars from the start of each update
import re
match = re.search(r'`([█░]+)`', update)
if match:
bars = match.group(1)
# Count filled bars (█) - higher count = higher importance
filled_count = bars.count('')
return -filled_count # Negative for descending order (most important first)
return 0 # Default for entries without impact bars
return sorted(updates, key=extract_impact_level)
def get_changes_in_dev_since(date_start, date_end=None, verbose=False):
cmd = ["git", "log", "--numstat", "--since", date_start]
if date_end:
cmd += ["--until", date_end]
cmd += ["origin/dev", "--", "quarto/contents/**/*.qmd"]
return run_git_command(cmd, verbose=verbose)
def get_commit_messages_for_file(file_path, since, until=None, verbose=False):
cmd = ["git", "log", "--pretty=format:%s", "--since", since]
if until:
cmd += ["--until", until]
cmd += ["origin/dev", "--", file_path]
messages = run_git_command(cmd, verbose=verbose)
# Return all commit messages - let AI determine importance
meaningful_messages = []
for message in messages.splitlines():
if message.strip():
meaningful_messages.append(message.strip())
return "\n".join(meaningful_messages)
def call_ollama(prompt, model="llama3.1:8b", verbose=False):
"""Call Ollama API for text generation."""
try:
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.2,
"num_predict": 100
}
}
if verbose:
print(f"🤖 Calling Ollama with model: {model}")
response = requests.post(OLLAMA_URL, json=payload, timeout=30)
response.raise_for_status()
result = response.json()
return result.get("response", "").strip()
except requests.exceptions.RequestException as e:
print(f"⚠️ Ollama API error: {e}")
return None
except Exception as e:
print(f"⚠️ Ollama error: {e}")
return None
def summarize_changes_with_openai(file_path, commit_messages, verbose=False, max_retries=3, use_ollama=False, ollama_model="llama3.1:8b"):
chapter_title = extract_chapter_title(file_path)
if verbose:
print(f"🤖 Calling {'Ollama' if use_ollama else 'OpenAI'} for: {file_path} -- {chapter_title}")
prompt = f"""You're writing a changelog entry for a machine learning textbook. Focus ONLY on changes that benefit readers, students, and instructors.
File: {file_path}
Chapter: {chapter_title}
Commit messages:
{commit_messages}
IGNORE these internal/infrastructure changes (don't mention them):
- Section IDs, headers, navigation improvements
- CLI help, script naming, formatting updates
- File reorganization, standardization, cross-references
- Table formatting, image filename changes
- Markdown rendering, build system improvements
- Script standardization, expert feedback incorporation
FOCUS ON these user-facing improvements:
- New content: concepts, examples, explanations, figures, diagrams
- Enhanced learning: quizzes, exercises, decision frameworks
- Improved clarity: better explanations, clearer writing, simplified concepts
- New features: tools, interactive elements, practical guides
- Content fixes: corrected equations, updated information, bug fixes
- Educational improvements: better flow, pedagogical enhancements
Only create an entry if there are meaningful user-facing changes. If changes are purely internal (section IDs, formatting, standardization, etc.), respond with "SKIP: Internal changes only".
The output format will be: `[IMPACT]` **{chapter_title}**: [YOUR SUMMARY]
Rate importance based on educational impact:
- █████ Major: New chapters, major concepts, significant educational content (rare)
- ████░ Large: Multiple new examples, substantial learning improvements (uncommon)
- ███░░ Medium: New examples, clarifications, moderate content additions (common)
- ██░░░ Small: Minor content fixes, single example additions (most common)
- █░░░░ Tiny: Small corrections, minor improvements (rare)
Format your response exactly like this:
CHANGES: [list 2-3 main USER-FACING changes from commits]
SUMMARY: [what changed that users care about - NO chapter name, just the changes]
IMPACT: [█████, ████░, ███░░, ██░░░, or █░░░░]
Example:
CHANGES: Added transformer architecture section, New attention mechanism diagrams, Fixed backprop equations
SUMMARY: Added transformer architecture section with attention mechanism diagrams and corrected backpropagation equations
IMPACT: ████░
GOOD examples:
- "Added lottery ticket hypothesis section with pruning examples"
- "New GPU memory optimization diagrams and CUDA code samples"
- "Expanded federated learning coverage with privacy-preserving techniques"
- "Fixed mathematical notation in backpropagation equations"
- "Added decision framework quiz for architecture selection"
Focus on WHAT was added/changed that improves learning, not internal infrastructure changes."""
for attempt in range(max_retries):
try:
# Add delay only for OpenAI (rate limiting)
if not use_ollama and attempt > 0:
time.sleep(OPENAI_DELAY * (2 ** attempt)) # exponential backoff
if use_ollama:
summary = call_ollama(prompt, model=ollama_model, verbose=verbose)
if summary is None:
raise Exception("Ollama call failed")
else:
response = client.chat.completions.create(
model="gpt-4",
temperature=0.2, # Lower temperature for more consistent output
max_tokens=100, # Limit length for conciseness
messages=[
{"role": "system", "content": "You are a technical writer creating concise changelog entries. Be specific and avoid generic language."},
{"role": "user", "content": prompt}
]
)
summary = response.choices[0].message.content.strip()
if not summary:
return f"- **{chapter_title}**: _(no meaningful changes detected)_"
# Check if AI decided to skip this entry due to internal-only changes
if "SKIP:" in summary or "Internal changes only" in summary:
if verbose:
print(f" ⏭️ Skipping {file_path} - internal changes only")
return None
# Parse the new structured format
import re
summary_match = re.search(r'SUMMARY:\s*(.+?)(?:\nIMPACT:|$)', summary, re.DOTALL)
impact_match = re.search(r'IMPACT:\s*([█░]+)', summary)
if summary_match:
parsed_summary = summary_match.group(1).strip()
# Remove any trailing punctuation
if parsed_summary.endswith("."):
parsed_summary = parsed_summary[:-1]
impact_bars = impact_match.group(1) if impact_match else "███░░" # default medium
# Add delay only for OpenAI after successful call
if not use_ollama:
time.sleep(OPENAI_DELAY)
return f"- `{impact_bars}` **{chapter_title}**: {parsed_summary}"
else:
# Fallback to old format if parsing fails
summary = summary.replace("--- --- --- ---", "").strip()
if summary.endswith("."):
summary = summary[:-1]
if not use_ollama:
time.sleep(OPENAI_DELAY)
return f"- **{chapter_title}**: {summary}"
except Exception as e:
print(f"⚠️ {'Ollama' if use_ollama else 'OpenAI'} attempt {attempt + 1} failed for {file_path}: {e}")
if attempt == max_retries - 1:
return f"- **{chapter_title}**: _(unable to summarize; see commits manually)_"
def format_friendly_date(date_str):
try:
# Try ISO format first (with T separator)
if 'T' in date_str:
dt = datetime.fromisoformat(date_str)
else:
# Fallback to space-separated format
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z")
# Format as "August 01 at 04:54 PM" (full month name, include time)
return dt.strftime("%B %d at %I:%M %p")
except:
return date_str
def normalized_path(path):
return os.path.normpath(path).lower()
def generate_entry(start_date, end_date=None, verbose=False, is_latest=False):
if verbose:
print(f"📁 Processing changes from {start_date} to {end_date or 'now'}")
print(f"🔍 Analyzing Git changes...")
changes = get_changes_in_dev_since(start_date, end_date, verbose=verbose)
if not changes.strip():
print(" ⚠️ No changes found in specified period")
return None
print("📊 Categorizing changes by file...")
changes_by_file = defaultdict(lambda: [0, 0])
for line in changes.splitlines():
parts = line.split("\t")
if len(parts) != 3:
continue
added, removed, file_path = parts
added = int(added) if added.isdigit() else 0
removed = int(removed) if removed.isdigit() else 0
changes_by_file[file_path][0] += added
changes_by_file[file_path][1] += removed
current_date = datetime.now().strftime('%B %d at %I:%M %p') if not end_date else format_friendly_date(end_date)
entry = f"### 📅 {current_date}\n\n"
frontmatter, chapters, labs, appendix = [], [], [], []
ordered_files = sorted(
changes_by_file,
key=lambda f: next(
(i for i, ch in enumerate(chapter_order) if normalized_path(f).endswith(normalized_path(ch))),
float('inf')
)
)
total_files = len(ordered_files)
print(f"📝 Processing {total_files} changed files...")
for idx, file_path in enumerate(ordered_files, 1):
added, removed = changes_by_file[file_path]
total = added + removed
if verbose:
print(f"🔍 Summarizing {file_path} ({added}+ / {removed}-) [{idx}/{total_files}]")
else:
print(f" 📄 [{idx}/{total_files}] {os.path.basename(file_path)} ({added}+ {removed}-)")
# Skip references
if "references.qmd" in file_path:
continue
commit_msgs = get_commit_messages_for_file(file_path, start_date, end_date, verbose=verbose)
# Skip if no meaningful commits
if not commit_msgs.strip():
if verbose:
print(f"⏭️ Skipping {file_path} - no meaningful changes")
continue
print(f" 🤖 Generating summary...")
summary = summarize_changes_with_openai(file_path, commit_msgs, verbose=verbose, use_ollama=use_ollama, ollama_model=args.model)
# Skip if AI determined these are internal-only changes
if summary is None:
continue
# Show the generated summary
summary_text = summary.replace(f"- **{extract_chapter_title(file_path)}**: ", "")
print(f" 📝 {summary_text}")
# Categorize by content type
if "contents/frontmatter/" in file_path:
frontmatter.append(summary)
elif "contents/labs/" in file_path:
labs.append(summary)
elif "contents/appendix/" in file_path:
appendix.append(summary)
else:
chapters.append(summary)
print(f"📋 Organizing into sections...")
print(f" 📄 Frontmatter: {len(frontmatter)} entries")
print(f" 📖 Chapters: {len(chapters)} entries")
print(f" 🧑‍💻 Labs: {len(labs)} entries")
print(f" 📚 Appendix: {len(appendix)} entries")
# Determine if sections should be open or closed
# All entries should be closed by default - let users choose what to explore
details_state = "" # Always closed for better UX
# Add sections in order: Frontmatter, Chapters, Labs, Appendix
if frontmatter:
entry += f"<details {details_state}>\n<summary>**📄 Frontmatter**</summary>\n\n" + "\n".join(sort_by_impact_level(frontmatter)) + "\n\n</details>\n\n"
if chapters:
entry += f"<details {details_state}>\n<summary>**📖 Chapters**</summary>\n\n" + "\n".join(sort_by_impact_level(chapters)) + "\n\n</details>\n\n"
if labs:
entry += f"<details {details_state}>\n<summary>**🧑‍💻 Labs**</summary>\n\n" + "\n".join(sort_by_impact_level(labs)) + "\n\n</details>\n\n"
if appendix:
entry += f"<details {details_state}>\n<summary>**📚 Appendix**</summary>\n\n" + "\n".join(sort_by_impact_level(appendix)) + "\n\n</details>\n"
# If no content sections were added, return None (empty entry)
if not frontmatter and not chapters and not labs and not appendix:
print(" ⚠️ No meaningful content changes found - skipping entry")
return None
print("✅ Entry generation complete")
return entry
def generate_demo_entry():
"""Generate a demo changelog entry with real data from the repository."""
current_date = datetime.now().strftime('%B %d at %I:%M %p')
current_year = datetime.now().year
# Get some real file paths from the repository
real_files = [
"quarto/contents/frontmatter/about/about.qmd",
"quarto/contents/frontmatter/acknowledgements/acknowledgements.qmd",
"quarto/contents/core/dl_primer/dl_primer.qmd",
"quarto/contents/core/workflow/workflow.qmd",
"quarto/contents/core/training/training.qmd",
"quarto/contents/core/introduction/introduction.qmd",
"quarto/contents/core/benchmarking/benchmarking.qmd",
"quarto/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd",
"quarto/contents/labs/raspi/setup/setup.qmd",
"quarto/contents/backmatter/resources/phd_survival_guide.qmd"
]
# Try to get some real commit data for more realistic content
try:
# Get recent commit messages for some files
recent_commits = run_git_command(["git", "log", "--oneline", "-5", "--", "quarto/contents/core/dl_primer/dl_primer.qmd"], verbose=False)
if recent_commits:
# Use real commit data if available
pass
except:
pass
# Generate realistic summaries based on actual files
frontmatter_entries = [
"**About**: Updated book description and target audience information",
"**Acknowledgements**: Added new contributors and updated the contributor list"
]
chapter_entries = [
"**Chapter 3: DL Primer**: Added new diagrams explaining neural network architectures and improved explanations of backpropagation",
"**Chapter 5: AI Workflow**: Enhanced the workflow diagram and added new examples for data preprocessing steps",
"**Chapter 8: AI Training**: Updated training examples with new code snippets and improved explanations of gradient descent",
"**Chapter 1: Introduction**: Fixed several typos and improved the introduction to machine learning concepts",
"**Chapter 12: Benchmarking AI**: Added new benchmarking metrics and updated performance comparison tables"
]
lab_entries = [
"**Lab: Arduino Image Classification**: Updated the image classification code with improved accuracy and added new examples",
"**Lab: Raspberry Pi Setup**: Fixed setup instructions and added troubleshooting section for common issues"
]
appendix_entries = [
"**PhD Survival Guide**: Added new resources for graduate students and updated links"
]
# Add impact bars
frontmatter_with_impact = [f"- `███░░` {entry}" for entry in frontmatter_entries[:1]] + [f"- `██░░░` {entry}" for entry in frontmatter_entries[1:]]
chapters_with_impact = [f"- `████░` {entry}" for entry in chapter_entries[:1]] + [f"- `███░░` {entry}" for entry in chapter_entries[1:3]] + [f"- `██░░░` {entry}" for entry in chapter_entries[3:]]
labs_with_impact = [f"- `███░░` {entry}" for entry in lab_entries[:1]] + [f"- `██░░░` {entry}" for entry in lab_entries[1:]]
appendix_with_impact = [f"- `█░░░░` {entry}" for entry in appendix_entries]
demo_entry = f"""## {current_year} Updates
### 📅 {current_date}
<details>
<summary>**📄 Frontmatter**</summary>
{chr(10).join(frontmatter_with_impact)}
</details>
<details>
<summary>**📖 Chapters**</summary>
{chr(10).join(chapters_with_impact)}
</details>
<details>
<summary>**🧑‍💻 Labs**</summary>
{chr(10).join(labs_with_impact)}
</details>
<details>
<summary>**📚 Appendix**</summary>
{chr(10).join(appendix_with_impact)}
</details>
"""
return demo_entry
def generate_release_notes_for_version(version, previous_version, description, verbose=False):
"""Generate release notes using your existing AI analysis"""
print(f"📝 Generating release notes for {version}...")
print(f"📋 Description: {description}")
print(f"🔄 Previous version: {previous_version}")
# Get the latest gh-pages commit date as the "since" date
latest_commit, latest_date = get_latest_gh_pages_commit()
if not latest_date:
print("❌ No previous release found!")
return None
print(f"📅 Analyzing changes since: {format_friendly_date(latest_date)}")
# Use your existing AI-powered analysis
entry = generate_entry(latest_date, verbose=verbose, is_latest=True)
if not entry:
print("⚠️ No meaningful changes found")
return None
# Format as release notes instead of changelog
release_notes = f"""## 📚 Release {version}
**{description}**
### 📋 Release Information
- **Type**: Release
- **Previous Version**: {previous_version}
- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- **Changes**: Since {format_friendly_date(latest_date)}
### 📝 What's New
{entry}
### 🔗 Quick Links
- 🌐 [Read Online](https://mlsysbook.ai)
- 📄 [Download PDF](https://mlsysbook.ai/pdf)
- 🧪 [Labs & Exercises](https://mlsysbook.ai/labs)
- 📚 [GitHub Repository](https://github.com/harvard-edge/cs249r_book)
### 📊 Technical Details
- **Build System**: Quarto with custom extensions
- **Deployment**: GitHub Pages + Netlify
- **PDF Generation**: LaTeX with compression
- **Content**: Markdown with interactive elements
---
*Generated with AI analysis of changes since last release*
"""
print("✅ Release notes generated successfully")
return release_notes
def fold_existing_entries(content):
"""Fold all existing details sections in the changelog content."""
import re
# Pattern to match <details open> and replace with <details>
pattern = r'<details open>'
replacement = '<details>'
return re.sub(pattern, replacement, content)
def generate_changelog(mode="incremental", verbose=False):
print("🔄 Starting Git data fetch...")
print(" 📦 Fetching gh-pages branch...")
run_git_command(["git", "fetch", "origin", "gh-pages:refs/remotes/origin/gh-pages"], verbose=verbose)
print(" 📦 Fetching dev branch...")
run_git_command(["git", "fetch", "origin", "dev:refs/remotes/origin/dev"], verbose=verbose)
print("✅ Git data fetch complete")
def get_latest_gh_pages_commit():
print("🔍 Looking for latest publication commit...")
# Look for actual publication commits, not administrative ones
output = run_git_command(["git", "log", "--pretty=format:%H %aI", "--grep=Built site for gh-pages", "origin/gh-pages"], verbose=verbose)
if output.strip():
first_line = output.split('\n')[0]
parts = first_line.split(" ", 1)
result = (parts[0], parts[1]) if len(parts) == 2 else (None, None)
if result[0]:
print(f" 📅 Found latest commit: {result[0][:8]} from {result[1]}")
return result
print(" ⚠️ No publication commits found")
return (None, None)
def get_all_gh_pages_commits():
print("🔍 Scanning all publication commits...")
# Look for actual publication commits, not administrative ones
output = run_git_command(["git", "log", "--pretty=format:%H %aI", "--grep=Built site for gh-pages", "origin/gh-pages"], verbose=verbose)
commits = []
for line in output.splitlines():
parts = line.split(" ", 1)
if len(parts) == 2:
commits.append((parts[0], parts[1]))
print(f" 📊 Found {len(commits)} publication commits")
return commits
def extract_year_from_date(date_str):
try:
return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z").year
except:
return datetime.now().year
latest_commit, latest_date = get_latest_gh_pages_commit()
if mode == "full":
if verbose:
print("🔁 Running full regeneration...")
commits = get_all_gh_pages_commits()
# Group commits by date (YYYY-MM-DD) to merge same-day publishes
def extract_date_only(date_str):
try:
return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z").strftime("%Y-%m-%d")
except:
return date_str.split()[0] # fallback to first part
# Group commits by publication date
commits_by_date = defaultdict(list)
for commit, date in commits:
date_key = extract_date_only(date)
commits_by_date[date_key].append((commit, date))
# Sort dates and get unique publication periods
unique_dates = sorted(commits_by_date.keys(), reverse=True) # newest first
print(f"📊 Found {len(unique_dates)} unique publication dates...")
# Group entries by year
entries_by_year = defaultdict(list)
for i in range(len(unique_dates) - 1):
current_date_key = unique_dates[i]
previous_date_key = unique_dates[i + 1]
# Get the latest commit from current date for the "published on" date
current_commits = commits_by_date[current_date_key]
latest_current = max(current_commits, key=lambda x: x[1]) # latest timestamp
# Get the earliest commit from previous date as the "since" date
previous_commits = commits_by_date[previous_date_key]
earliest_previous = min(previous_commits, key=lambda x: x[1]) # earliest timestamp
current_date = latest_current[1]
previous_date = earliest_previous[1]
# Extract year from current_date (the publication date)
pub_year = extract_year_from_date(current_date)
print(f"📅 Processing period {i+1}/{len(unique_dates)-1}: {format_friendly_date(previous_date)}{format_friendly_date(current_date)} [{pub_year}]")
entry = generate_entry(previous_date, current_date, verbose=verbose, is_latest=(i==0))
if entry:
entries_by_year[pub_year].append(entry)
if not entries_by_year:
return "_No updates found._"
# Build output with year headers, newest years first
output_sections = []
for year in sorted(entries_by_year.keys(), reverse=True):
year_header = f"## {year} Updates"
year_entries = "\n\n".join(entries_by_year[year])
output_sections.append(f"{year_header}\n\n{year_entries}")
return "\n\n---\n\n".join(output_sections) + "\n"
else:
if verbose:
print("⚡ Running update mode...")
print(f"📅 Processing changes since: {format_friendly_date(latest_date) if latest_date else 'beginning'}")
entry = generate_entry(latest_date, verbose=verbose, is_latest=True)
if not entry:
return "_No updates found._"
current_year = datetime.now().year
year_header = f"## {current_year} Updates"
return f"{year_header}\n\n{entry}"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate changelog for ML systems book.")
parser.add_argument("-f", "--full", action="store_true", help="Regenerate the entire changelog from scratch.")
parser.add_argument("-u", "--update", action="store_true", help="Add new entries since last gh-pages publish.")
parser.add_argument("-t", "--test", action="store_true", help="Run without writing to file.")
parser.add_argument("--demo", action="store_true", help="Generate a demo changelog entry with sample data.")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output.")
parser.add_argument("-q", "--quarto-config", type=str, help="Path to quarto config file (default: quarto/config/_quarto-pdf.yml)")
parser.add_argument("-m", "--model", type=str, default="gemma2:9b", help="Ollama model to use (default: gemma2:9b). Popular options: gemma2:9b, gemma2:27b, llama3.1:8b, llama3.1:70b")
parser.add_argument("--release-notes", action="store_true", help="Generate release notes instead of changelog entry.")
parser.add_argument("--version", type=str, help="Version for release notes (required with --release-notes).")
parser.add_argument("--previous-version", type=str, help="Previous version for release notes (required with --release-notes).")
parser.add_argument("--description", type=str, help="Release description (required with --release-notes).")
args = parser.parse_args()
# Handle demo mode first
if args.demo:
print("🎭 DEMO MODE - Generating sample changelog entry")
demo_entry = generate_demo_entry()
print("=" * 60)
print("📝 DEMO CHANGELOG ENTRY")
print("=" * 60)
print(demo_entry)
print("=" * 60)
print("✅ Demo entry generated successfully!")
exit(0)
# Handle release notes mode
if args.release_notes:
if not args.version or not args.previous_version or not args.description:
print("❌ Error: --release-notes requires --version, --previous-version, and --description")
print("💡 Example: --release-notes --version v1.2.0 --previous-version v1.1.0 --description 'Add new chapter'")
exit(1)
print("📝 RELEASE NOTES MODE")
mode = "release_notes"
else:
# Require either --full or --update to be specified
if args.full and args.update:
print("❌ Error: Cannot specify both --full and --update modes")
exit(1)
elif args.full:
mode = "full"
elif args.update:
mode = "update"
else:
print("❌ Error: Must specify either --full, --update, or --release-notes mode")
print("💡 Use --help for usage information")
print("💡 Use --demo to see a sample changelog entry")
exit(1)
try:
load_chapter_order(args.quarto_config)
# Print configuration header
print("=" * 60)
print("📝 CHANGELOG GENERATION CONFIG")
print("=" * 60)
print(f"🎯 Mode: {mode.upper()}")
print(f"🤖 AI Model: {args.model} (via Ollama)")
print(f"🔧 Test Mode: {'ON' if args.test else 'OFF'}")
print(f"📢 Verbose: {'ON' if args.verbose else 'OFF'}")
print(f"📋 Features: Impact bars, importance sorting, specific summaries")
print("=" * 60)
print()
print(f"🚀 Starting changelog generation in {mode} mode...")
print(f"🤖 Using Ollama for summarization with model: {args.model}")
use_ollama = True
# Test Ollama connection
test_response = call_ollama("Hello", model=args.model, verbose=False)
if test_response is None:
print("❌ Failed to connect to Ollama. Make sure it's running on localhost:11434")
print("💡 To install models in Ollama:")
print(" ollama pull gemma2:9b")
print(" ollama pull gemma2:27b")
exit(1)
print("✅ Ollama connection successful")
if mode == "release_notes":
# Generate release notes
new_entry = generate_release_notes_for_version(
args.version,
args.previous_version,
args.description,
verbose=args.verbose
)
else:
# Generate changelog entry
new_entry = generate_changelog(mode=mode, verbose=args.verbose)
if args.test:
print("🧪 TEST OUTPUT ONLY:\n")
print(new_entry)
else:
existing = ""
if os.path.exists(CHANGELOG_FILE):
with open(CHANGELOG_FILE, "r", encoding="utf-8") as f:
existing = f.read()
current_year = datetime.now().year
year_header = f"## {current_year} Updates"
# For update mode, insert new entry after the year header
if mode == "full":
# For full mode, replace entire content (already includes year headers)
updated_content = new_entry.strip()
else:
# For incremental, insert new entry after year header
existing_lines = existing.splitlines()
new_lines = []
inserted = False
for line in existing_lines:
new_lines.append(line)
# Insert new entry right after the year header
if not inserted and line.strip() == year_header:
# Add the new entry (without year header since it's already in the file)
new_entry_lines = new_entry.strip().splitlines()
# Skip the first line (year header) since we're inserting after existing year header
if new_entry_lines and new_entry_lines[0].strip() == year_header:
new_entry_lines = new_entry_lines[1:]
new_lines.extend(new_entry_lines)
new_lines.append("") # Add blank line
inserted = True
if not inserted:
# If no year header found, prepend to beginning
new_lines = new_entry.strip().splitlines() + [""] + existing_lines
updated_content = "\n".join(new_lines)
if mode == "release_notes":
# Save release notes to a file for the workflow to use
release_notes_file = f"release_notes_{args.version}.md"
with open(release_notes_file, "w", encoding="utf-8") as f:
f.write(new_entry.strip() + "\n")
print(f"\n✅ Release notes written to {release_notes_file}")
print("📋 Next step: Use this file in your GitHub workflow")
else:
# Save changelog entry
with open(CHANGELOG_FILE, "w", encoding="utf-8") as f:
f.write(updated_content.strip() + "\n")
print(f"\n✅ Changelog written to {CHANGELOG_FILE}")
except KeyboardInterrupt:
print(f"\n⚠️ Process interrupted by user")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()