mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-10 15:49:25 -05:00
Fix indentation issues in improve_figure_captions.py
- Corrected Python indentation inconsistencies - Fixed malformed code blocks from previous edits - Maintains all functionality while cleaning up formatting
This commit is contained in:
@@ -101,7 +101,7 @@ class CaptionQualityChecker:
|
||||
pattern = r'^\*\*[^*]+\*\*:\s*.+'
|
||||
|
||||
if re.match(pattern, caption.strip()):
|
||||
return True, ""
|
||||
return True, ""
|
||||
else:
|
||||
return False, "Missing **Bold Title**: format"
|
||||
|
||||
@@ -447,7 +447,7 @@ class FigureCaptionImprover:
|
||||
word_index += 1
|
||||
|
||||
return ''.join(result_tokens)
|
||||
|
||||
|
||||
def format_bold_explanation_caption(self, caption: str) -> str:
|
||||
"""
|
||||
Format caption to ensure proper **bold**: explanation capitalization.
|
||||
@@ -889,9 +889,9 @@ class FigureCaptionImprover:
|
||||
check_line = lines[j].strip()
|
||||
if check_line.startswith('##') and not check_line.startswith('###'):
|
||||
section_title = re.sub(r'^#+\s*', '', check_line)
|
||||
section_title = re.sub(r'\s*\{#[^}]+\}.*$', '', section_title)
|
||||
break
|
||||
|
||||
section_title = re.sub(r'\s*\{#[^}]+\}.*$', '', section_title)
|
||||
break
|
||||
|
||||
# Extract context around reference (±10 lines, then expand to word boundaries)
|
||||
start_idx = max(0, i - 10)
|
||||
end_idx = min(len(lines), i + 10)
|
||||
@@ -911,11 +911,11 @@ class FigureCaptionImprover:
|
||||
start_word = max(0, fig_word_pos - 150)
|
||||
end_word = min(len(words), fig_word_pos + 150)
|
||||
context_text = ' '.join(words[start_word:end_word])
|
||||
|
||||
return {
|
||||
'title': section_title,
|
||||
'content': context_text
|
||||
}
|
||||
|
||||
return {
|
||||
'title': section_title,
|
||||
'content': context_text
|
||||
}
|
||||
|
||||
# Ultimate fallback
|
||||
return {
|
||||
@@ -1021,42 +1021,42 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
base_delay = 1 # seconds
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Prepare the request payload
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
try:
|
||||
# Prepare the request payload
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.7, # Higher temperature for more diverse, creative captions
|
||||
"num_predict": 120, # Slightly shorter for focused responses
|
||||
"top_p": 0.9 # Add nucleus sampling for better variety
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Add image if provided (for multimodal models)
|
||||
if image_path and os.path.exists(image_path):
|
||||
encoded_image = self.encode_image(image_path)
|
||||
if encoded_image:
|
||||
payload["images"] = [encoded_image]
|
||||
|
||||
# Make request to Ollama
|
||||
response = requests.post(
|
||||
"http://localhost:11434/api/generate",
|
||||
json=payload,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
new_caption = result.get('response', '').strip()
|
||||
|
||||
# Add image if provided (for multimodal models)
|
||||
if image_path and os.path.exists(image_path):
|
||||
encoded_image = self.encode_image(image_path)
|
||||
if encoded_image:
|
||||
payload["images"] = [encoded_image]
|
||||
# Clean up any markdown code blocks
|
||||
if new_caption.startswith('```') and new_caption.endswith('```'):
|
||||
new_caption = new_caption.strip('`').strip()
|
||||
if new_caption.startswith('json\n'):
|
||||
new_caption = new_caption[5:].strip()
|
||||
|
||||
# Make request to Ollama
|
||||
response = requests.post(
|
||||
"http://localhost:11434/api/generate",
|
||||
json=payload,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
new_caption = result.get('response', '').strip()
|
||||
|
||||
# Clean up any markdown code blocks
|
||||
if new_caption.startswith('```') and new_caption.endswith('```'):
|
||||
new_caption = new_caption.strip('`').strip()
|
||||
if new_caption.startswith('json\n'):
|
||||
new_caption = new_caption[5:].strip()
|
||||
|
||||
# Sanity check: Reject overly long captions (likely hallucination)
|
||||
word_count = len(new_caption.split())
|
||||
if word_count > 100:
|
||||
@@ -1064,8 +1064,8 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
# Don't retry for long captions - this is a formatting issue, not API error
|
||||
return None
|
||||
|
||||
# Validate the format contains **bold**:
|
||||
if '**' in new_caption and ':' in new_caption:
|
||||
# Validate the format contains **bold**:
|
||||
if '**' in new_caption and ':' in new_caption:
|
||||
# Apply comprehensive quality improvements
|
||||
formatted_caption = self.format_bold_explanation_caption(new_caption)
|
||||
improved_caption = self.validate_and_improve_caption(formatted_caption, is_table)
|
||||
@@ -1077,11 +1077,11 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
return None
|
||||
|
||||
return improved_caption
|
||||
else:
|
||||
print(f" ⚠️ Generated caption doesn't follow **bold**: format: {new_caption[:100]}")
|
||||
# Don't retry for format issues - this is a generation problem, not API error
|
||||
return None
|
||||
else:
|
||||
print(f" ⚠️ Generated caption doesn't follow **bold**: format: {new_caption[:100]}")
|
||||
# Don't retry for format issues - this is a generation problem, not API error
|
||||
return None
|
||||
else:
|
||||
# API error - this is worth retrying
|
||||
if attempt < max_retries - 1:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
@@ -1090,9 +1090,9 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
continue
|
||||
else:
|
||||
print(f" ❌ Ollama API error: {response.status_code} (all {max_retries} attempts failed)")
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
# Network/connection error - worth retrying
|
||||
if attempt < max_retries - 1:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
@@ -1101,8 +1101,8 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
continue
|
||||
else:
|
||||
print(f" ❌ Request error: {e} (all {max_retries} attempts failed)")
|
||||
return None
|
||||
except Exception as e:
|
||||
return None
|
||||
except Exception as e:
|
||||
# Unexpected error - worth retrying once but likely a code issue
|
||||
if attempt < max_retries - 1:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
@@ -1114,7 +1114,7 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
return None
|
||||
|
||||
# Should never reach here due to the loop structure, but just in case
|
||||
return None
|
||||
return None
|
||||
|
||||
def compile_tikz_to_image(self, tikz_code: str, figure_id: str) -> Optional[str]:
|
||||
"""Compile TikZ code to a PNG image for multimodal processing."""
|
||||
@@ -1327,8 +1327,8 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
# Extract the path - handle escaped characters properly
|
||||
path = self._extract_balanced_path(full_text)
|
||||
if path is not None:
|
||||
return {
|
||||
'type': 'markdown',
|
||||
return {
|
||||
'type': 'markdown',
|
||||
'caption': caption.strip(),
|
||||
'path': path.strip(),
|
||||
'full_match': full_text,
|
||||
@@ -1545,8 +1545,8 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
Dict with 'caption', 'full_match' or None if not found
|
||||
"""
|
||||
# Try old format first (with leading colon) - this must be checked first to properly strip `: ` prefix
|
||||
pattern_old = rf'^:\s*([^{{\n]+?)\s*\{{[^}}]*#{re.escape(tbl_id)}(?:\s|[^}}])*\}}\s*$'
|
||||
match = re.search(pattern_old, content, re.MULTILINE)
|
||||
pattern_old = rf'^:\s*([^{{\n]+?)\s*\{{[^}}]*#{re.escape(tbl_id)}(?:\s|[^}}])*\}}\s*$'
|
||||
match = re.search(pattern_old, content, re.MULTILINE)
|
||||
|
||||
if not match:
|
||||
# Fall back to new format (without leading colon) - allow colons in caption text
|
||||
@@ -1686,7 +1686,7 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
if new_caption.startswith(': '):
|
||||
# New caption already has prefix, use as-is
|
||||
formatted_caption = new_caption
|
||||
else:
|
||||
else:
|
||||
# Add the `: ` prefix and ensure it ends with a period
|
||||
if not new_caption.endswith('.'):
|
||||
formatted_caption = f': {new_caption}.'
|
||||
@@ -1744,8 +1744,8 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
return self.update_code_figure(content, fig_id, new_caption)
|
||||
else:
|
||||
# Fallback to markdown method
|
||||
return self.update_markdown_figure(content, fig_id, new_caption)
|
||||
|
||||
return self.update_markdown_figure(content, fig_id, new_caption)
|
||||
|
||||
def print_summary(self) -> None:
|
||||
"""Print a summary of the processing results."""
|
||||
print(f"\n{'='*60}")
|
||||
@@ -2093,8 +2093,8 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
qmd_files = [Path(f) for f in specific_files if f.endswith('.qmd')]
|
||||
print(f"📖 Processing {len(qmd_files)} specific QMD files")
|
||||
else:
|
||||
qmd_files = self.find_qmd_files_in_order(directories)
|
||||
print(f"📖 Scanning {len(qmd_files)} QMD files in book order")
|
||||
qmd_files = self.find_qmd_files_in_order(directories)
|
||||
print(f"📖 Scanning {len(qmd_files)} QMD files in book order")
|
||||
|
||||
content_map = {
|
||||
'figures': {},
|
||||
@@ -2139,81 +2139,81 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
|
||||
# Process each potential figure ID (unless tables-only mode)
|
||||
if not tables_only:
|
||||
for fig_id in potential_fig_ids:
|
||||
try:
|
||||
fig_def = self.find_figure_definition_in_qmd(content, fig_id)
|
||||
if fig_def:
|
||||
for fig_id in potential_fig_ids:
|
||||
try:
|
||||
fig_def = self.find_figure_definition_in_qmd(content, fig_id)
|
||||
if fig_def:
|
||||
# Store original caption as-is from the file
|
||||
original_caption = fig_def['caption']
|
||||
|
||||
content_map['figures'][fig_id] = {
|
||||
|
||||
content_map['figures'][fig_id] = {
|
||||
'original_caption': original_caption,
|
||||
'new_caption': '',
|
||||
'type': fig_def['type'],
|
||||
'source_file': qmd_file
|
||||
}
|
||||
'new_caption': '',
|
||||
'type': fig_def['type'],
|
||||
'source_file': qmd_file
|
||||
}
|
||||
|
||||
print(f" ✅ Found figure: {fig_id} ({fig_def['type']})")
|
||||
file_figures += 1
|
||||
stats['figures_found'] += 1
|
||||
|
||||
# Count by type
|
||||
if fig_def['type'] == 'markdown':
|
||||
stats['markdown_figures'] += 1
|
||||
elif fig_def['type'] == 'tikz':
|
||||
stats['tikz_figures'] += 1
|
||||
elif fig_def['type'] == 'code':
|
||||
stats['code_figures'] += 1
|
||||
|
||||
print(f" ✅ Found figure: {fig_id} ({fig_def['type']})")
|
||||
file_figures += 1
|
||||
stats['figures_found'] += 1
|
||||
|
||||
# Count by type
|
||||
if fig_def['type'] == 'markdown':
|
||||
stats['markdown_figures'] += 1
|
||||
elif fig_def['type'] == 'tikz':
|
||||
stats['tikz_figures'] += 1
|
||||
elif fig_def['type'] == 'code':
|
||||
stats['code_figures'] += 1
|
||||
|
||||
else:
|
||||
print(f" ⚠️ Failed to extract: {fig_id}")
|
||||
stats['extraction_failures'] += 1
|
||||
stats['failed_extractions'].append(fig_id)
|
||||
if qmd_file not in stats['files_with_issues']:
|
||||
stats['files_with_issues'].append(qmd_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {fig_id}: {e}")
|
||||
else:
|
||||
print(f" ⚠️ Failed to extract: {fig_id}")
|
||||
stats['extraction_failures'] += 1
|
||||
stats['failed_extractions'].append(fig_id)
|
||||
stats['failed_extractions'].append(fig_id)
|
||||
if qmd_file not in stats['files_with_issues']:
|
||||
stats['files_with_issues'].append(qmd_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {fig_id}: {e}")
|
||||
stats['extraction_failures'] += 1
|
||||
stats['failed_extractions'].append(fig_id)
|
||||
if qmd_file not in stats['files_with_issues']:
|
||||
stats['files_with_issues'].append(qmd_file)
|
||||
else:
|
||||
print(f" ⏭️ Skipping {len(potential_fig_ids)} figures (tables-only mode)")
|
||||
|
||||
# Process each potential table ID (unless figures-only mode)
|
||||
if not figures_only:
|
||||
for tbl_id in potential_tbl_ids:
|
||||
try:
|
||||
tbl_def = self.detect_table(content, tbl_id)
|
||||
if tbl_def:
|
||||
for tbl_id in potential_tbl_ids:
|
||||
try:
|
||||
tbl_def = self.detect_table(content, tbl_id)
|
||||
if tbl_def:
|
||||
# Store original caption as-is from the file
|
||||
original_caption = tbl_def['caption']
|
||||
|
||||
content_map['tables'][tbl_id] = {
|
||||
|
||||
content_map['tables'][tbl_id] = {
|
||||
'original_caption': original_caption,
|
||||
'new_caption': '',
|
||||
'type': 'table',
|
||||
'source_file': qmd_file
|
||||
}
|
||||
|
||||
print(f" ✅ Found table: {tbl_id}")
|
||||
file_tables += 1
|
||||
stats['tables_found'] += 1
|
||||
|
||||
else:
|
||||
print(f" ⚠️ Failed to extract: {tbl_id}")
|
||||
stats['extraction_failures'] += 1
|
||||
stats['failed_extractions'].append(tbl_id)
|
||||
if qmd_file not in stats['files_with_issues']:
|
||||
stats['files_with_issues'].append(qmd_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {tbl_id}: {e}")
|
||||
'new_caption': '',
|
||||
'type': 'table',
|
||||
'source_file': qmd_file
|
||||
}
|
||||
|
||||
print(f" ✅ Found table: {tbl_id}")
|
||||
file_tables += 1
|
||||
stats['tables_found'] += 1
|
||||
|
||||
else:
|
||||
print(f" ⚠️ Failed to extract: {tbl_id}")
|
||||
stats['extraction_failures'] += 1
|
||||
stats['failed_extractions'].append(tbl_id)
|
||||
stats['failed_extractions'].append(tbl_id)
|
||||
if qmd_file not in stats['files_with_issues']:
|
||||
stats['files_with_issues'].append(qmd_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {tbl_id}: {e}")
|
||||
stats['extraction_failures'] += 1
|
||||
stats['failed_extractions'].append(tbl_id)
|
||||
if qmd_file not in stats['files_with_issues']:
|
||||
stats['files_with_issues'].append(qmd_file)
|
||||
else:
|
||||
print(f" ⏭️ Skipping {len(potential_tbl_ids)} tables (figures-only mode)")
|
||||
|
||||
@@ -2365,9 +2365,9 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
"""
|
||||
try:
|
||||
# Read current file content
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Build targeted search pattern based on type
|
||||
if item_type == 'figure':
|
||||
old_pattern, new_pattern = self.build_figure_search_patterns(
|
||||
@@ -2390,12 +2390,12 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
new_content = content.replace(old_pattern, new_pattern)
|
||||
|
||||
# Write back the file
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(new_content)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error in targeted update: {e}")
|
||||
return False
|
||||
|
||||
@@ -2524,36 +2524,36 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
print(f" 📊 Processing figure: {fig_id}")
|
||||
|
||||
try:
|
||||
# Extract context around this figure
|
||||
context = self.extract_section_context(file_content, fig_id)
|
||||
|
||||
# Find image path if it's a markdown figure
|
||||
image_path = None
|
||||
if fig_data.get('type') == 'markdown':
|
||||
# Try to extract image path from the figure definition
|
||||
image_pattern = rf'!\[[^\]]*\]\(([^)]+)\)[^{{]*{{[^}}]*#{re.escape(fig_id)}'
|
||||
match = re.search(image_pattern, file_content)
|
||||
if match:
|
||||
relative_path = match.group(1)
|
||||
# Resolve relative to the source file directory
|
||||
source_dir = Path(source_file).parent
|
||||
image_path = str(source_dir / relative_path)
|
||||
if not os.path.exists(image_path):
|
||||
image_path = None
|
||||
|
||||
# Generate improved caption
|
||||
# Extract context around this figure
|
||||
context = self.extract_section_context(file_content, fig_id)
|
||||
|
||||
# Find image path if it's a markdown figure
|
||||
image_path = None
|
||||
if fig_data.get('type') == 'markdown':
|
||||
# Try to extract image path from the figure definition
|
||||
image_pattern = rf'!\[[^\]]*\]\(([^)]+)\)[^{{]*{{[^}}]*#{re.escape(fig_id)}'
|
||||
match = re.search(image_pattern, file_content)
|
||||
if match:
|
||||
relative_path = match.group(1)
|
||||
# Resolve relative to the source file directory
|
||||
source_dir = Path(source_file).parent
|
||||
image_path = str(source_dir / relative_path)
|
||||
if not os.path.exists(image_path):
|
||||
image_path = None
|
||||
|
||||
# Generate improved caption
|
||||
current_caption = fig_data.get('original_caption', '')
|
||||
new_caption = self.generate_caption_with_ollama(
|
||||
context['title'],
|
||||
context['content'],
|
||||
fig_id,
|
||||
current_caption,
|
||||
new_caption = self.generate_caption_with_ollama(
|
||||
context['title'],
|
||||
context['content'],
|
||||
fig_id,
|
||||
current_caption,
|
||||
image_path,
|
||||
is_table=False
|
||||
)
|
||||
|
||||
if new_caption and new_caption != current_caption:
|
||||
fig_data['new_caption'] = new_caption
|
||||
)
|
||||
|
||||
if new_caption and new_caption != current_caption:
|
||||
fig_data['new_caption'] = new_caption
|
||||
file_improvements.append({
|
||||
'id': fig_id,
|
||||
'type': 'figure',
|
||||
@@ -2563,33 +2563,33 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
file_improved_count += 1
|
||||
word_count = len(new_caption.split())
|
||||
print(f" ✅ Improved ({word_count} words): {new_caption[:80]}{'...' if len(new_caption) > 80 else ''}")
|
||||
else:
|
||||
print(f" ⚠️ No improvement generated")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {fig_id}: {e}")
|
||||
|
||||
else:
|
||||
print(f" ⚠️ No improvement generated")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {fig_id}: {e}")
|
||||
|
||||
# Process all tables in this file
|
||||
for tbl_id, tbl_data in items['tables']:
|
||||
print(f" 📋 Processing table: {tbl_id}")
|
||||
|
||||
try:
|
||||
# Extract context around this table
|
||||
context = self.extract_section_context(file_content, tbl_id)
|
||||
|
||||
# Generate improved caption (no image for tables)
|
||||
print(f" 📋 Processing table: {tbl_id}")
|
||||
|
||||
try:
|
||||
# Extract context around this table
|
||||
context = self.extract_section_context(file_content, tbl_id)
|
||||
|
||||
# Generate improved caption (no image for tables)
|
||||
current_caption = tbl_data.get('original_caption', '')
|
||||
new_caption = self.generate_caption_with_ollama(
|
||||
context['title'],
|
||||
context['content'],
|
||||
tbl_id,
|
||||
current_caption,
|
||||
new_caption = self.generate_caption_with_ollama(
|
||||
context['title'],
|
||||
context['content'],
|
||||
tbl_id,
|
||||
current_caption,
|
||||
None, # No image for tables
|
||||
is_table=True
|
||||
)
|
||||
|
||||
if new_caption and new_caption != current_caption:
|
||||
tbl_data['new_caption'] = new_caption
|
||||
)
|
||||
|
||||
if new_caption and new_caption != current_caption:
|
||||
tbl_data['new_caption'] = new_caption
|
||||
file_improvements.append({
|
||||
'id': tbl_id,
|
||||
'type': 'table',
|
||||
@@ -2599,12 +2599,12 @@ Instead, write DIRECT, ACTIVE statements:
|
||||
file_improved_count += 1
|
||||
word_count = len(new_caption.split())
|
||||
print(f" ✅ Improved ({word_count} words): {new_caption[:80]}{'...' if len(new_caption) > 80 else ''}")
|
||||
else:
|
||||
print(f" ⚠️ No improvement generated")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {tbl_id}: {e}")
|
||||
|
||||
else:
|
||||
print(f" ⚠️ No improvement generated")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error processing {tbl_id}: {e}")
|
||||
|
||||
# Immediately update this file if we have improvements
|
||||
if file_improvements:
|
||||
print(f" ✏️ Updating file with {file_improved_count} improvements...")
|
||||
@@ -3251,9 +3251,9 @@ Examples:
|
||||
return 0 if success else 1
|
||||
|
||||
# Validate that we have input files/directories for other operations
|
||||
if not args.files and not args.directories:
|
||||
if not args.files and not args.directories:
|
||||
print("❌ Error: --files or --directories required")
|
||||
return 1
|
||||
return 1
|
||||
|
||||
# Determine which files/directories to process
|
||||
directories = []
|
||||
@@ -3289,7 +3289,7 @@ Examples:
|
||||
print("✅ Content map building completed!")
|
||||
|
||||
# Always save JSON for --build-map
|
||||
improver.save_content_map(content_map)
|
||||
improver.save_content_map(content_map)
|
||||
|
||||
# Show extraction report
|
||||
stats = content_map['metadata']['extraction_stats']
|
||||
@@ -3308,8 +3308,8 @@ Examples:
|
||||
print(f" 📋 Tables: {stats['tables_found']} total")
|
||||
print(f" 📁 Files processed: {content_map['metadata']['qmd_files_scanned']}")
|
||||
|
||||
print(f"\n💾 Content map saved to: content_map.json")
|
||||
print(f"📄 You can now review the complete JSON structure!")
|
||||
print(f"\n💾 Content map saved to: content_map.json")
|
||||
print(f"📄 You can now review the complete JSON structure!")
|
||||
|
||||
else:
|
||||
print("❌ Content map building failed!")
|
||||
@@ -3347,7 +3347,7 @@ Examples:
|
||||
if content_map and args.save_json:
|
||||
improver.save_content_map(content_map)
|
||||
print("💾 Repaired content map saved to content_map.json")
|
||||
print("✅ Caption repair completed!")
|
||||
print("✅ Caption repair completed!")
|
||||
|
||||
elif args.improve:
|
||||
# LLM caption improvement mode (explicit)
|
||||
@@ -3358,7 +3358,7 @@ Examples:
|
||||
if not improved_content_map:
|
||||
return 1
|
||||
|
||||
else:
|
||||
else:
|
||||
# Default: Same as --improve (LLM improvement)
|
||||
print("🚀 Improving captions with LLM (default mode)...")
|
||||
improved_content_map = improver.complete_caption_improvement_workflow(directories, args.save_json,
|
||||
|
||||
Reference in New Issue
Block a user