mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-11 17:49:25 -05:00
chore: update section IDs and add table formatting utilities
Section ID Updates: - Updated section identifiers across multiple chapters for consistency - Modified section references in conclusion, introduction, ai_for_good, efficient_ai, hw_acceleration, benchmarking, and ml_systems chapters - Fixed broken Bitter Lesson reference in efficient_ai chapter Quiz Updates: - Updated quiz section references in emerging_topics_quizzes.json, frontiers_quizzes.json, and ml_systems_quizzes.json to match new section IDs New Utilities: - Added format_tables.py: Python utility for formatting Quarto markdown tables - Added test_format_tables.py: Test suite for table formatting utility These changes maintain cross-reference consistency after recent chapter reorganization.
This commit is contained in:
553
tools/scripts/format_tables.py
Normal file
553
tools/scripts/format_tables.py
Normal file
@@ -0,0 +1,553 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Table Formatter for MLSysBook
|
||||
|
||||
This script formats markdown grid tables to ensure:
|
||||
1. All column headers (first row) are bolded
|
||||
2. All first column entries are bolded
|
||||
3. Column widths are properly calculated based on content
|
||||
4. Alignment bars match the actual column widths
|
||||
5. Content is left-aligned within cells
|
||||
|
||||
Usage:
|
||||
python format_tables.py --check <file> # Check if tables are formatted correctly
|
||||
python format_tables.py --fix <file> # Format tables in place
|
||||
python format_tables.py --check-all # Check all .qmd files in contents/core
|
||||
python format_tables.py --fix-all # Format all .qmd files in contents/core
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional
|
||||
import unicodedata
|
||||
|
||||
|
||||
def display_width(text: str) -> int:
|
||||
"""
|
||||
Calculate the display width of text, accounting for Unicode characters.
|
||||
|
||||
Bold markers (**text**) are not counted in display width.
|
||||
East Asian Wide and Fullwidth characters count as 2.
|
||||
"""
|
||||
# Remove bold markers for width calculation
|
||||
text = text.replace('**', '')
|
||||
|
||||
width = 0
|
||||
for char in text:
|
||||
if unicodedata.east_asian_width(char) in ('F', 'W'):
|
||||
width += 2
|
||||
else:
|
||||
width += 1
|
||||
return width
|
||||
|
||||
|
||||
def parse_table(lines: List[str]) -> Optional[dict]:
|
||||
"""
|
||||
Parse a markdown grid table into structured data.
|
||||
|
||||
Returns:
|
||||
dict with 'start_line', 'end_line', 'header', 'separator', 'rows', 'caption'
|
||||
or None if not a valid table
|
||||
"""
|
||||
if not lines or not lines[0].startswith('+'):
|
||||
return None
|
||||
|
||||
table_data = {
|
||||
'start_line': 0,
|
||||
'end_line': 0,
|
||||
'header_border': '',
|
||||
'header': '',
|
||||
'separator': '',
|
||||
'rows': [],
|
||||
'footer_border': '',
|
||||
'caption': ''
|
||||
}
|
||||
|
||||
i = 0
|
||||
|
||||
# First line should be top border
|
||||
if not lines[i].startswith('+'):
|
||||
return None
|
||||
table_data['header_border'] = lines[i]
|
||||
i += 1
|
||||
|
||||
# Next should be header row
|
||||
if i >= len(lines) or not lines[i].startswith('|'):
|
||||
return None
|
||||
table_data['header'] = lines[i]
|
||||
i += 1
|
||||
|
||||
# Next should be separator with := for alignment
|
||||
if i >= len(lines) or not lines[i].startswith('+'):
|
||||
return None
|
||||
table_data['separator'] = lines[i]
|
||||
i += 1
|
||||
|
||||
# Parse data rows
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
if line.startswith('|'):
|
||||
table_data['rows'].append(line)
|
||||
i += 1
|
||||
elif line.startswith('+'):
|
||||
# Row separator or footer
|
||||
if i + 1 < len(lines) and lines[i + 1].startswith('|'):
|
||||
# This is a row separator, include it with the row
|
||||
table_data['rows'].append(line)
|
||||
i += 1
|
||||
else:
|
||||
# This is the footer border
|
||||
table_data['footer_border'] = line
|
||||
i += 1
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
# Check for caption (starts with : after table)
|
||||
if i < len(lines) and lines[i].strip().startswith(':'):
|
||||
table_data['caption'] = lines[i]
|
||||
i += 1
|
||||
|
||||
table_data['end_line'] = i
|
||||
|
||||
return table_data
|
||||
|
||||
|
||||
def parse_row(row: str) -> List[str]:
|
||||
"""Parse a table row into individual cell contents."""
|
||||
# Remove leading and trailing pipes
|
||||
row = row.strip()
|
||||
if row.startswith('|'):
|
||||
row = row[1:]
|
||||
if row.endswith('|'):
|
||||
row = row[:-1]
|
||||
|
||||
# Split by pipes and strip whitespace
|
||||
cells = [cell.strip() for cell in row.split('|')]
|
||||
return cells
|
||||
|
||||
|
||||
def bold_text(text: str) -> str:
|
||||
"""Add bold markers to text if not already bolded. Returns empty string if text is empty."""
|
||||
text = text.strip()
|
||||
# Don't bold empty strings
|
||||
if not text:
|
||||
return ''
|
||||
# Don't double-bold
|
||||
if text.startswith('**') and text.endswith('**'):
|
||||
return text
|
||||
return f"**{text}**"
|
||||
|
||||
|
||||
def is_bolded(text: str) -> bool:
|
||||
"""Check if text is already bolded."""
|
||||
text = text.strip()
|
||||
return text.startswith('**') and text.endswith('**')
|
||||
|
||||
|
||||
def calculate_column_widths(header: str, rows: List[str]) -> List[int]:
|
||||
"""
|
||||
Calculate the width needed for each column based on content.
|
||||
|
||||
Returns list of widths for each column.
|
||||
"""
|
||||
# Parse all rows to get cell contents
|
||||
all_rows = [parse_row(header)]
|
||||
for row in rows:
|
||||
if row.startswith('|'):
|
||||
all_rows.append(parse_row(row))
|
||||
|
||||
# Find number of columns
|
||||
num_cols = len(all_rows[0])
|
||||
|
||||
# Calculate max width for each column
|
||||
widths = [0] * num_cols
|
||||
for row in all_rows:
|
||||
for col_idx, cell in enumerate(row):
|
||||
if col_idx < num_cols:
|
||||
width = display_width(cell)
|
||||
widths[col_idx] = max(widths[col_idx], width)
|
||||
|
||||
return widths
|
||||
|
||||
|
||||
def extract_alignment(separator: str) -> List[str]:
|
||||
"""
|
||||
Extract alignment information from separator line.
|
||||
|
||||
Returns list of alignments: 'left', 'center', or 'right'
|
||||
"""
|
||||
# Split by + to get each column separator
|
||||
parts = separator.split('+')[1:-1] # Remove empty first and last
|
||||
|
||||
alignments = []
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part.startswith(':') and part.endswith(':'):
|
||||
alignments.append('center')
|
||||
elif part.startswith(':'):
|
||||
alignments.append('left')
|
||||
elif part.endswith(':'):
|
||||
alignments.append('right')
|
||||
else:
|
||||
alignments.append('left') # Default
|
||||
|
||||
return alignments
|
||||
|
||||
|
||||
def build_border(widths: List[int]) -> str:
|
||||
"""Build a border line like +----+----+----+"""
|
||||
parts = ['-' * (w + 2) for w in widths] # +2 for spaces around content
|
||||
return '+' + '+'.join(parts) + '+'
|
||||
|
||||
|
||||
def build_separator(widths: List[int], alignments: List[str]) -> str:
|
||||
"""Build separator line like +:===+:===:+====:+"""
|
||||
parts = []
|
||||
for width, align in zip(widths, alignments):
|
||||
if align == 'center':
|
||||
parts.append(':' + '=' * width + ':')
|
||||
elif align == 'left':
|
||||
parts.append(':' + '=' * width)
|
||||
elif align == 'right':
|
||||
parts.append('=' * width + ':')
|
||||
else:
|
||||
parts.append('=' * width)
|
||||
return '+' + '+'.join(parts) + '+'
|
||||
|
||||
|
||||
def format_cell(content: str, width: int, alignment: str = 'left') -> str:
|
||||
"""
|
||||
Format cell content to fit within the specified width.
|
||||
|
||||
Pads content to match width, accounting for display width.
|
||||
"""
|
||||
content = content.strip()
|
||||
display_w = display_width(content)
|
||||
padding_needed = width - display_w
|
||||
|
||||
if alignment == 'center':
|
||||
left_pad = padding_needed // 2
|
||||
right_pad = padding_needed - left_pad
|
||||
return ' ' * left_pad + content + ' ' * right_pad
|
||||
elif alignment == 'right':
|
||||
return ' ' * padding_needed + content
|
||||
else: # left
|
||||
return content + ' ' * padding_needed
|
||||
|
||||
|
||||
def format_row(cells: List[str], widths: List[int], alignments: List[str], bold_first: bool = False) -> str:
|
||||
"""Format a row with proper cell widths and optional bolding of first column."""
|
||||
formatted_cells = []
|
||||
for idx, (cell, width, align) in enumerate(zip(cells, widths, alignments)):
|
||||
# Bold first column if requested
|
||||
if idx == 0 and bold_first and not is_bolded(cell):
|
||||
cell = bold_text(cell)
|
||||
formatted = format_cell(cell, width, align)
|
||||
formatted_cells.append(formatted)
|
||||
|
||||
return '| ' + ' | '.join(formatted_cells) + ' |'
|
||||
|
||||
|
||||
def format_table(table_data: dict) -> List[str]:
|
||||
"""
|
||||
Format a complete table with proper bolding and column widths.
|
||||
|
||||
Returns formatted table as list of lines.
|
||||
"""
|
||||
# Parse header and rows
|
||||
header_cells = parse_row(table_data['header'])
|
||||
alignments = extract_alignment(table_data['separator'])
|
||||
|
||||
# Bold all header cells
|
||||
header_cells = [bold_text(cell) for cell in header_cells]
|
||||
|
||||
# Parse and prepare data rows (exclude border lines)
|
||||
data_rows = []
|
||||
for row in table_data['rows']:
|
||||
if row.startswith('|'):
|
||||
cells = parse_row(row)
|
||||
# Bold first column only if it's not empty
|
||||
if cells and cells[0].strip() and not is_bolded(cells[0]):
|
||||
cells[0] = bold_text(cells[0])
|
||||
data_rows.append(cells)
|
||||
|
||||
# Calculate column widths based on all content
|
||||
all_cells = [header_cells] + data_rows
|
||||
num_cols = len(header_cells)
|
||||
widths = [0] * num_cols
|
||||
|
||||
for row in all_cells:
|
||||
for col_idx, cell in enumerate(row):
|
||||
if col_idx < num_cols:
|
||||
width = display_width(cell)
|
||||
widths[col_idx] = max(widths[col_idx], width)
|
||||
|
||||
# Build formatted table
|
||||
formatted = []
|
||||
|
||||
# Top border
|
||||
formatted.append(build_border(widths))
|
||||
|
||||
# Header row
|
||||
formatted.append(format_row(header_cells, widths, alignments, bold_first=False))
|
||||
|
||||
# Separator
|
||||
formatted.append(build_separator(widths, alignments))
|
||||
|
||||
# Data rows with borders
|
||||
for i, row_cells in enumerate(data_rows):
|
||||
formatted.append(format_row(row_cells, widths, alignments, bold_first=False))
|
||||
# Add row separator (border) after each data row except the last
|
||||
if i < len(data_rows) - 1:
|
||||
formatted.append(build_border(widths))
|
||||
|
||||
# Footer border
|
||||
formatted.append(build_border(widths))
|
||||
|
||||
# Caption
|
||||
if table_data.get('caption'):
|
||||
formatted.append('') # Empty line before caption
|
||||
formatted.append(table_data['caption'])
|
||||
|
||||
return formatted
|
||||
|
||||
|
||||
def check_table_format(table_data: dict) -> List[str]:
|
||||
"""
|
||||
Check if a table is properly formatted.
|
||||
|
||||
Returns list of issues found (empty if table is correct).
|
||||
"""
|
||||
issues = []
|
||||
|
||||
# Parse header
|
||||
header_cells = parse_row(table_data['header'])
|
||||
|
||||
# Check if all headers are bolded
|
||||
for idx, cell in enumerate(header_cells):
|
||||
if not is_bolded(cell):
|
||||
issues.append(f"Header column {idx + 1} is not bolded: '{cell}'")
|
||||
|
||||
# Parse data rows and check first column (skip empty cells)
|
||||
row_num = 1
|
||||
for row in table_data['rows']:
|
||||
if row.startswith('|'):
|
||||
cells = parse_row(row)
|
||||
# Only check non-empty first column cells
|
||||
if cells and cells[0].strip() and not is_bolded(cells[0]):
|
||||
issues.append(f"First column in row {row_num} is not bolded: '{cells[0]}'")
|
||||
row_num += 1
|
||||
|
||||
# Check column width consistency
|
||||
alignments = extract_alignment(table_data['separator'])
|
||||
header_cells_bolded = [bold_text(cell) for cell in header_cells]
|
||||
|
||||
data_rows = []
|
||||
for row in table_data['rows']:
|
||||
if row.startswith('|'):
|
||||
cells = parse_row(row)
|
||||
# Bold first column only if it's not empty
|
||||
if cells and cells[0].strip() and not is_bolded(cells[0]):
|
||||
cells[0] = bold_text(cells[0])
|
||||
data_rows.append(cells)
|
||||
|
||||
# Calculate expected widths
|
||||
all_cells = [header_cells_bolded] + data_rows
|
||||
num_cols = len(header_cells)
|
||||
expected_widths = [0] * num_cols
|
||||
|
||||
for row in all_cells:
|
||||
for col_idx, cell in enumerate(row):
|
||||
if col_idx < num_cols:
|
||||
width = display_width(cell)
|
||||
expected_widths[col_idx] = max(expected_widths[col_idx], width)
|
||||
|
||||
# Check if current borders match expected widths
|
||||
expected_border = build_border(expected_widths)
|
||||
if table_data['header_border'].strip() != expected_border.strip():
|
||||
issues.append(f"Border widths don't match content widths")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def process_file(file_path: Path, fix: bool = False) -> Tuple[int, int]:
|
||||
"""
|
||||
Process a single file to check or fix table formatting.
|
||||
|
||||
Returns (tables_checked, tables_with_issues)
|
||||
"""
|
||||
content = file_path.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
|
||||
tables_checked = 0
|
||||
tables_with_issues = 0
|
||||
modified = False
|
||||
|
||||
i = 0
|
||||
new_lines = []
|
||||
|
||||
while i < len(lines):
|
||||
# Check if this might be a table start
|
||||
if lines[i].startswith('+'):
|
||||
# Try to parse table
|
||||
table_lines = []
|
||||
j = i
|
||||
while j < len(lines):
|
||||
if lines[j].strip() == '' and table_lines and not lines[j - 1].startswith(':'):
|
||||
break
|
||||
if lines[j].startswith(':') and table_lines:
|
||||
table_lines.append(lines[j])
|
||||
j += 1
|
||||
break
|
||||
if lines[j].startswith('+') or lines[j].startswith('|'):
|
||||
table_lines.append(lines[j])
|
||||
j += 1
|
||||
else:
|
||||
break
|
||||
|
||||
table_data = parse_table(table_lines)
|
||||
|
||||
if table_data:
|
||||
tables_checked += 1
|
||||
issues = check_table_format(table_data)
|
||||
|
||||
if issues:
|
||||
tables_with_issues += 1
|
||||
if not fix:
|
||||
print(f" Issues found in table at line {i + 1}:")
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
else:
|
||||
# Format the table
|
||||
formatted = format_table(table_data)
|
||||
new_lines.extend(formatted)
|
||||
modified = True
|
||||
else:
|
||||
# Table is already correct
|
||||
new_lines.extend(table_lines)
|
||||
|
||||
i = j
|
||||
continue
|
||||
|
||||
# Not a table, keep line as is
|
||||
new_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
if fix and modified:
|
||||
# Write back to file
|
||||
file_path.write_text('\n'.join(new_lines), encoding='utf-8')
|
||||
print(f" Fixed {tables_with_issues} tables")
|
||||
|
||||
return tables_checked, tables_with_issues
|
||||
|
||||
|
||||
def find_qmd_files(base_path: Path) -> List[Path]:
|
||||
"""Find all .qmd files in contents/core directory."""
|
||||
core_path = base_path / "quarto" / "contents" / "core"
|
||||
if not core_path.exists():
|
||||
print(f"Error: {core_path} does not exist")
|
||||
return []
|
||||
|
||||
qmd_files = list(core_path.rglob("*.qmd"))
|
||||
return sorted(qmd_files)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Format markdown grid tables in MLSysBook",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Check a single file
|
||||
python format_tables.py --check quarto/contents/core/optimizations/optimizations.qmd
|
||||
|
||||
# Fix a single file
|
||||
python format_tables.py --fix quarto/contents/core/optimizations/optimizations.qmd
|
||||
|
||||
# Check all files
|
||||
python format_tables.py --check-all
|
||||
|
||||
# Fix all files
|
||||
python format_tables.py --fix-all
|
||||
"""
|
||||
)
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--check', metavar='FILE', help='Check table formatting in a file')
|
||||
group.add_argument('--fix', metavar='FILE', help='Fix table formatting in a file')
|
||||
group.add_argument('--check-all', action='store_true', help='Check all .qmd files')
|
||||
group.add_argument('--fix-all', action='store_true', help='Fix all .qmd files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine workspace root (assume script is in tools/scripts/)
|
||||
script_path = Path(__file__).resolve()
|
||||
workspace_root = script_path.parent.parent.parent
|
||||
|
||||
if args.check or args.fix:
|
||||
# Process single file
|
||||
file_path = Path(args.check or args.fix)
|
||||
if not file_path.is_absolute():
|
||||
file_path = workspace_root / file_path
|
||||
|
||||
if not file_path.exists():
|
||||
print(f"Error: File {file_path} does not exist")
|
||||
return 1
|
||||
|
||||
fix_mode = bool(args.fix)
|
||||
print(f"{'Fixing' if fix_mode else 'Checking'} {file_path.relative_to(workspace_root)}")
|
||||
|
||||
tables_checked, tables_with_issues = process_file(file_path, fix=fix_mode)
|
||||
|
||||
print(f" Found {tables_checked} tables, {tables_with_issues} with issues")
|
||||
|
||||
if not fix_mode and tables_with_issues > 0:
|
||||
return 1
|
||||
|
||||
else:
|
||||
# Process all files
|
||||
qmd_files = find_qmd_files(workspace_root)
|
||||
|
||||
if not qmd_files:
|
||||
print("No .qmd files found")
|
||||
return 1
|
||||
|
||||
fix_mode = args.fix_all
|
||||
print(f"{'Fixing' if fix_mode else 'Checking'} {len(qmd_files)} files...")
|
||||
print()
|
||||
|
||||
total_tables = 0
|
||||
total_issues = 0
|
||||
files_with_issues = []
|
||||
|
||||
for qmd_file in qmd_files:
|
||||
tables_checked, tables_with_issues = process_file(qmd_file, fix=fix_mode)
|
||||
|
||||
if tables_checked > 0:
|
||||
rel_path = qmd_file.relative_to(workspace_root)
|
||||
print(f"{rel_path}: {tables_checked} tables, {tables_with_issues} with issues")
|
||||
|
||||
total_tables += tables_checked
|
||||
total_issues += tables_with_issues
|
||||
|
||||
if tables_with_issues > 0:
|
||||
files_with_issues.append(rel_path)
|
||||
|
||||
print()
|
||||
print(f"Total: {total_tables} tables checked, {total_issues} with issues")
|
||||
|
||||
if not fix_mode and total_issues > 0:
|
||||
print()
|
||||
print("Files with formatting issues:")
|
||||
for file_path in files_with_issues:
|
||||
print(f" - {file_path}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
343
tools/scripts/test_format_tables.py
Normal file
343
tools/scripts/test_format_tables.py
Normal file
@@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test cases for table formatter.
|
||||
|
||||
Tests various edge cases including:
|
||||
- Standard tables with multiple rows
|
||||
- Tables with empty cells
|
||||
- Tables with multi-row cells
|
||||
- Tables with Unicode characters
|
||||
- Tables with already bolded content
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from format_tables import (
|
||||
display_width,
|
||||
parse_table,
|
||||
parse_row,
|
||||
bold_text,
|
||||
is_bolded,
|
||||
calculate_column_widths,
|
||||
extract_alignment,
|
||||
build_border,
|
||||
build_separator,
|
||||
format_cell,
|
||||
format_row,
|
||||
format_table,
|
||||
check_table_format
|
||||
)
|
||||
|
||||
|
||||
def test_display_width():
|
||||
"""Test display width calculation."""
|
||||
print("Testing display_width...")
|
||||
|
||||
# Basic ASCII
|
||||
assert display_width("hello") == 5
|
||||
|
||||
# Bold markers should not count
|
||||
assert display_width("**hello**") == 5
|
||||
|
||||
# Unicode characters
|
||||
assert display_width("↑↑ High") == 7
|
||||
|
||||
# Mixed
|
||||
assert display_width("**↑↑ High**") == 7
|
||||
|
||||
print(" ✓ display_width tests passed")
|
||||
|
||||
|
||||
def test_bold_text():
|
||||
"""Test bolding text."""
|
||||
print("Testing bold_text...")
|
||||
|
||||
# Basic text
|
||||
assert bold_text("hello") == "**hello**"
|
||||
|
||||
# Already bolded
|
||||
assert bold_text("**hello**") == "**hello**"
|
||||
|
||||
# Empty text
|
||||
assert bold_text("") == ""
|
||||
assert bold_text(" ") == ""
|
||||
|
||||
# Text with spaces
|
||||
assert bold_text(" hello ") == "**hello**"
|
||||
|
||||
print(" ✓ bold_text tests passed")
|
||||
|
||||
|
||||
def test_is_bolded():
|
||||
"""Test checking if text is bolded."""
|
||||
print("Testing is_bolded...")
|
||||
|
||||
assert is_bolded("**hello**") == True
|
||||
assert is_bolded("hello") == False
|
||||
assert is_bolded("**hello") == False
|
||||
assert is_bolded("hello**") == False
|
||||
assert is_bolded("") == False
|
||||
|
||||
print(" ✓ is_bolded tests passed")
|
||||
|
||||
|
||||
def test_parse_row():
|
||||
"""Test parsing table rows."""
|
||||
print("Testing parse_row...")
|
||||
|
||||
row = "| Header 1 | Header 2 | Header 3 |"
|
||||
cells = parse_row(row)
|
||||
assert cells == ["Header 1", "Header 2", "Header 3"]
|
||||
|
||||
# Empty cells
|
||||
row = "| Value 1 | | Value 3 |"
|
||||
cells = parse_row(row)
|
||||
assert cells == ["Value 1", "", "Value 3"]
|
||||
|
||||
print(" ✓ parse_row tests passed")
|
||||
|
||||
|
||||
def test_extract_alignment():
|
||||
"""Test extracting alignment from separator."""
|
||||
print("Testing extract_alignment...")
|
||||
|
||||
# Left aligned
|
||||
sep = "+:===+:===+:===+"
|
||||
alignments = extract_alignment(sep)
|
||||
assert alignments == ["left", "left", "left"]
|
||||
|
||||
# Center aligned
|
||||
sep = "+:===:+:===:+:===:+"
|
||||
alignments = extract_alignment(sep)
|
||||
assert alignments == ["center", "center", "center"]
|
||||
|
||||
# Mixed
|
||||
sep = "+:===+:===:+===:+"
|
||||
alignments = extract_alignment(sep)
|
||||
assert alignments == ["left", "center", "right"]
|
||||
|
||||
print(" ✓ extract_alignment tests passed")
|
||||
|
||||
|
||||
def test_build_border():
|
||||
"""Test building border lines."""
|
||||
print("Testing build_border...")
|
||||
|
||||
widths = [10, 15, 20]
|
||||
border = build_border(widths)
|
||||
expected = "+------------+-----------------+----------------------+"
|
||||
assert border == expected
|
||||
|
||||
print(" ✓ build_border tests passed")
|
||||
|
||||
|
||||
def test_build_separator():
|
||||
"""Test building separator lines."""
|
||||
print("Testing build_separator...")
|
||||
|
||||
widths = [10, 15, 20]
|
||||
alignments = ["left", "center", "right"]
|
||||
sep = build_separator(widths, alignments)
|
||||
expected = "+:==========+:===============:+====================:+"
|
||||
assert sep == expected
|
||||
|
||||
print(" ✓ build_separator tests passed")
|
||||
|
||||
|
||||
def test_format_cell():
|
||||
"""Test formatting cell content."""
|
||||
print("Testing format_cell...")
|
||||
|
||||
# Left aligned
|
||||
cell = format_cell("Hello", 10, "left")
|
||||
assert cell == "Hello "
|
||||
assert len(cell) == 10
|
||||
|
||||
# Center aligned
|
||||
cell = format_cell("Hi", 10, "center")
|
||||
assert cell == " Hi "
|
||||
assert len(cell) == 10
|
||||
|
||||
# Right aligned
|
||||
cell = format_cell("Bye", 10, "right")
|
||||
assert cell == " Bye"
|
||||
assert len(cell) == 10
|
||||
|
||||
# With Unicode
|
||||
cell = format_cell("↑ High", 10, "left")
|
||||
assert len(cell) == 10
|
||||
|
||||
print(" ✓ format_cell tests passed")
|
||||
|
||||
|
||||
def test_simple_table():
|
||||
"""Test formatting a simple table."""
|
||||
print("Testing simple table formatting...")
|
||||
|
||||
table_lines = [
|
||||
"+----------+----------+",
|
||||
"| Header 1 | Header 2 |",
|
||||
"+:=========+:=========+",
|
||||
"| Value 1 | Value 2 |",
|
||||
"+----------+----------+",
|
||||
"| Value 3 | Value 4 |",
|
||||
"+----------+----------+",
|
||||
"",
|
||||
": Test table caption {#tbl-test}"
|
||||
]
|
||||
|
||||
table_data = parse_table(table_lines)
|
||||
assert table_data is not None
|
||||
|
||||
# Check for issues (should find unbolded headers)
|
||||
issues = check_table_format(table_data)
|
||||
assert len(issues) > 0
|
||||
|
||||
# Format the table
|
||||
formatted = format_table(table_data)
|
||||
|
||||
# Check that headers are bolded
|
||||
assert "**Header 1**" in formatted[1]
|
||||
assert "**Header 2**" in formatted[1]
|
||||
|
||||
# Check that first column is bolded
|
||||
assert "**Value 1**" in formatted[3]
|
||||
assert "**Value 3**" in formatted[5]
|
||||
|
||||
print(" ✓ Simple table formatting passed")
|
||||
|
||||
|
||||
def test_table_with_empty_cells():
|
||||
"""Test table with empty cells in first column."""
|
||||
print("Testing table with empty cells...")
|
||||
|
||||
table_lines = [
|
||||
"+-----------+---------+",
|
||||
"| Technique | Goal |",
|
||||
"+:==========+:=======:+",
|
||||
"| Pruning | Reduce |",
|
||||
"+-----------+---------+",
|
||||
"| | Size |",
|
||||
"+-----------+---------+"
|
||||
]
|
||||
|
||||
table_data = parse_table(table_lines)
|
||||
assert table_data is not None
|
||||
|
||||
# Format the table
|
||||
formatted = format_table(table_data)
|
||||
|
||||
# Check that headers are bolded
|
||||
assert "**Technique**" in formatted[1]
|
||||
|
||||
# Check that first column with content is bolded
|
||||
assert "**Pruning**" in formatted[3]
|
||||
|
||||
# Empty cell should remain empty (no bold markers)
|
||||
# Should not have "****" for empty cells
|
||||
assert "****" not in formatted[5]
|
||||
|
||||
print(" ✓ Table with empty cells passed")
|
||||
|
||||
|
||||
def test_table_with_unicode():
|
||||
"""Test table with Unicode characters."""
|
||||
print("Testing table with Unicode characters...")
|
||||
|
||||
table_lines = [
|
||||
"+----------+----------+",
|
||||
"| Type | Status |",
|
||||
"+:=========+:========:+",
|
||||
"| Memory | ↑↑ High |",
|
||||
"+----------+----------+",
|
||||
"| Speed | → Neutral|",
|
||||
"+----------+----------+"
|
||||
]
|
||||
|
||||
table_data = parse_table(table_lines)
|
||||
assert table_data is not None
|
||||
|
||||
formatted = format_table(table_data)
|
||||
|
||||
# Check formatting preserved Unicode
|
||||
assert "↑↑ High" in " ".join(formatted)
|
||||
assert "→ Neutral" in " ".join(formatted)
|
||||
|
||||
print(" ✓ Table with Unicode passed")
|
||||
|
||||
|
||||
def test_already_formatted_table():
|
||||
"""Test table that's already properly formatted."""
|
||||
print("Testing already formatted table...")
|
||||
|
||||
table_lines = [
|
||||
"+--------------+--------------+",
|
||||
"| **Header 1** | **Header 2** |",
|
||||
"+:============:+:============:+",
|
||||
"| **Row 1** | Value |",
|
||||
"+--------------+--------------+",
|
||||
"| **Row 2** | Value |",
|
||||
"+--------------+--------------+"
|
||||
]
|
||||
|
||||
table_data = parse_table(table_lines)
|
||||
assert table_data is not None
|
||||
|
||||
# Should have no issues
|
||||
issues = check_table_format(table_data)
|
||||
# Note: May still have border width issues to check
|
||||
|
||||
formatted = format_table(table_data)
|
||||
|
||||
# Headers should stay bolded (not double-bolded)
|
||||
assert "**Header 1**" in formatted[1]
|
||||
assert "****Header 1****" not in formatted[1]
|
||||
|
||||
print(" ✓ Already formatted table passed")
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all test cases."""
|
||||
print("=" * 60)
|
||||
print("Running Table Formatter Tests")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
try:
|
||||
test_display_width()
|
||||
test_bold_text()
|
||||
test_is_bolded()
|
||||
test_parse_row()
|
||||
test_extract_alignment()
|
||||
test_build_border()
|
||||
test_build_separator()
|
||||
test_format_cell()
|
||||
test_simple_table()
|
||||
test_table_with_empty_cells()
|
||||
test_table_with_unicode()
|
||||
test_already_formatted_table()
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("All tests passed! ✅")
|
||||
print("=" * 60)
|
||||
return 0
|
||||
|
||||
except AssertionError as e:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"Test failed: {e}")
|
||||
print("=" * 60)
|
||||
return 1
|
||||
except Exception as e:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"Error running tests: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print("=" * 60)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(run_all_tests())
|
||||
Reference in New Issue
Block a user