Files
cs249r_book/book/tools/scripts/genai/footnote_assistant.py

1168 lines
48 KiB
Python

import argparse
import time
import os
import json
import re
import gradio as gr
import logging
# Import client libraries
from openai import OpenAI
from groq import Groq
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler("footnote_assistant.log")
]
)
# Initialize client based on command-line choice
client = None
api_provider = None
model_name = None
# --- Parse document and extract sections and headers ---
def parse_qmd_sections(text):
logging.info("Parsing QMD sections")
lines = text.splitlines()
sections = []
headers = []
buffer = []
found_header = False
prologue = "" # Define prologue variable
for i, line in enumerate(lines):
if re.match(r'^#+\s+', line.strip()):
prologue = "\n".join(lines[:i]).strip()
lines = lines[i:]
break
for line in lines:
# Match headers with regex: #+ followed by space, then any text
if re.match(r'^#+\s+', line.strip()):
if found_header and buffer:
joined = "\n".join(buffer)
sections.append(joined)
buffer = []
found_header = True
if found_header:
buffer.append(line)
if buffer:
joined = "\n".join(buffer)
sections.append(joined)
# Extract headers for the outline
for i, section in enumerate(sections):
lines = section.split('\n')
if lines:
first_line = lines[0].strip()
# Use regex to extract header level and text properly
header_match = re.match(r'^(#+)\s+(.*?)$', first_line)
if header_match:
level = len(header_match.group(1))
header_text = header_match.group(2).strip()
headers.append({"text": header_text, "level": level, "index": i})
else:
# Fallback method if regex doesn't match
level = 0
for char in first_line:
if char == '#':
level += 1
else:
break
header_text = first_line[level:].strip()
headers.append({"text": header_text, "level": level, "index": i})
logging.info(f"Found {len(sections)} sections")
return sections, headers, prologue # Return prologue as well
# --- Replace section text in full file ---
def replace_section(full_text, old, new):
# If old text isn't found, log a warning
if old not in full_text:
logging.warning(f"Could not find section to replace. First 50 chars of section: {old[:50]}")
return full_text
# Otherwise replace it and return
return full_text.replace(old, new)
# --- Get LLM footnote suggestions ---
def get_footnote_suggestions(section_text, prompt_template):
logging.info(f"Getting footnote suggestions from {api_provider} LLM using model {model_name}")
# Don't use .format() at all - just concatenate the text at the end
if "{text}" in prompt_template:
complete_prompt = prompt_template.replace("{text}", section_text)
else:
complete_prompt = prompt_template + "\n\nText to analyze:\n" + section_text
# Save the prompt to a file for debugging
with open("last_prompt_sent.txt", "w") as f:
f.write(complete_prompt)
messages = [
{"role": "system", "content": "You are an academic footnote assistant. Your response must be valid JSON only."},
{"role": "user", "content": complete_prompt}
]
logging.info(f"Sending request to {api_provider} API")
try:
# Use the global client that was set at startup
if api_provider.lower() == "openai":
response = client.chat.completions.create(
model=model_name,
messages=messages
)
elif api_provider.lower() == "groq":
response = client.chat.completions.create(
model=model_name,
messages=messages
)
else:
raise ValueError(f"Unsupported API provider: {api_provider}")
content = response.choices[0].message.content
# Save the response to a file for debugging
with open("last_api_response.txt", "w") as f:
f.write(content)
logging.info(f"Received response: {content[:100]}...")
return content
except Exception as e:
logging.error(f"API error: {e}")
return json.dumps({"footnotes": []})
def show_section_with_markers(section_text, all_footnotes):
"""Show the section with colored footnote markers for all possible footnotes"""
if not all_footnotes:
return section_text.replace("\n", "<br>")
# First, identify paragraphs in the text
paragraphs = re.split(r'\n\s*\n', section_text)
modified_paragraphs = []
# Escape HTML characters
for para in paragraphs:
escaped_para = para.replace("<", "&lt;").replace(">", "&gt;")
# Add colored markers for footnotes
for fn in all_footnotes:
insert_after = fn["insert_after"]
marker = fn["marker"]
# Skip if insert_after is empty
if not insert_after.strip():
continue
# Check if the phrase is at the beginning of the paragraph
if escaped_para.strip().startswith(insert_after):
# Insert marker directly after the phrase
escaped_para = escaped_para.replace(
insert_after,
f'{insert_after}<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>',
1
)
continue
# Check for punctuation following the phrase
pattern = re.compile(rf"({re.escape(insert_after)})([.,;:!?])")
match = pattern.search(escaped_para)
if match:
# Insert the colored marker before the punctuation
escaped_para = pattern.sub(
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>\2',
escaped_para,
count=1
)
else:
# No punctuation, just add colored marker directly after the phrase
escaped = re.escape(insert_after)
pattern = re.compile(rf"({escaped})(?![^\n]*\[\^)")
escaped_para = pattern.sub(
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>',
escaped_para,
count=1
)
modified_paragraphs.append(escaped_para)
# Join paragraphs with proper spacing
html_text = "<br><br>".join(modified_paragraphs)
# Replace remaining newlines with <br> tags
html_text = html_text.replace("\n", "<br>")
return html_text
# --- Show preview with colored footnote markers for selected footnotes ---
def show_preview_with_markers(section_text, selected_options, all_footnotes):
"""Show the preview with colored footnote markers for selected footnotes and per-paragraph footnotes"""
# Extract the indices from the selected checkbox text
selected_indices = []
for option in selected_options:
# Extract the number from the format "1. [^fn-xxx]: text"
match = re.match(r'^(\d+)\.', option)
if match:
# Adjust for 1-based indexing in display vs 0-based in code
idx = int(match.group(1)) - 1
selected_indices.append(str(idx))
if not all_footnotes or not selected_indices:
return section_text.replace("\n", "<br>")
# First, identify paragraphs in the text
paragraphs = re.split(r'\n\s*\n', section_text)
modified_paragraphs = []
# Escape HTML characters and initialize tracking
escaped_paragraphs = []
paragraph_footnotes = [[] for _ in paragraphs]
for i, para in enumerate(paragraphs):
escaped_paragraphs.append(para.replace("<", "&lt;").replace(">", "&gt;"))
# For each selected footnote
for idx in selected_indices:
try:
idx_int = int(idx)
fn = all_footnotes[idx_int]
# Get the insert phrase
insert_after = fn["insert_after"]
marker = fn["marker"]
# Find which paragraph contains this phrase
found = False
for i, paragraph in enumerate(paragraphs):
if insert_after in paragraph:
# Get the modified paragraph text (already HTML-escaped)
modified_paragraph = escaped_paragraphs[i]
# Handle punctuation positioning
pattern = re.compile(rf"({re.escape(insert_after)})([.,;:!?])")
match = pattern.search(modified_paragraph)
if match:
# Use HTML for colored marker
modified_paragraph = pattern.sub(
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>\2',
modified_paragraph,
count=1
)
else:
# No punctuation, use HTML for colored marker
escaped = re.escape(insert_after)
pattern = re.compile(rf"({escaped})(?![^\n]*\[\^)")
modified_paragraph = pattern.sub(
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>',
modified_paragraph,
count=1
)
# Update the modified paragraph
escaped_paragraphs[i] = modified_paragraph
# Add footnote to this paragraph's collection
paragraph_footnotes[i].append(f"<span style='color: #2E86C1; font-weight: bold;'>{marker}</span>: {fn['footnote_text']}")
found = True
break
except Exception as e:
logging.error(f"Error applying footnote {idx}: {e}")
# Assemble paragraphs with their footnotes
for i, para in enumerate(escaped_paragraphs):
if paragraph_footnotes[i]:
footnote_html = "<div style='padding-left: 20px; margin-top: 10px; margin-bottom: 10px; border-left: 2px solid #ccc;'>"
footnote_html += "<br>".join(paragraph_footnotes[i])
footnote_html += "</div>"
modified_paragraphs.append(f"{para}{footnote_html}")
else:
modified_paragraphs.append(para)
# Join all paragraphs with proper spacing
html_text = "<br><br>".join(modified_paragraphs)
return html_text
def apply_footnotes(section_text, selected_options, all_footnotes, global_footnote_set):
"""
Apply selected footnotes to the section, ensuring no duplicates across the document.
IMPORTANT: Skips footnotes that would be inserted inside ::: div blocks.
"""
selected_indices = [int(re.match(r'^(\d+)\.', opt).group(1)) - 1 for opt in selected_options if re.match(r'^(\d+)\.', opt)]
if not all_footnotes or not selected_indices:
return section_text
paragraphs = re.split(r'\n\s*\n', section_text)
modified_paragraphs = []
paragraph_footnotes = [[] for _ in paragraphs]
# Track which paragraphs are inside div blocks
lines = section_text.split('\n')
in_div_block = False
div_paragraph_indices = set()
current_para_idx = 0
empty_line_count = 0
for line in lines:
# Track div blocks
if line.strip().startswith(':::'):
in_div_block = not in_div_block
# Track paragraph transitions (double newline)
if not line.strip():
empty_line_count += 1
if empty_line_count >= 1: # Paragraph break
current_para_idx += 1
empty_line_count = 0
else:
empty_line_count = 0
# Mark this paragraph as being in a div block
if in_div_block:
div_paragraph_indices.add(current_para_idx)
for idx in selected_indices:
try:
fn = all_footnotes[idx]
insert_after = fn["insert_after"]
marker = fn["marker"]
# Check if the footnote marker was already added before in the document
if marker in global_footnote_set:
logging.info(f"Skipping duplicate footnote marker: {marker}")
continue # Skip adding this marker again
# Find and modify the paragraph where this phrase appears
found = False
for i, paragraph in enumerate(paragraphs):
if insert_after in paragraph:
# Check if this paragraph is inside a div block
if i in div_paragraph_indices:
logging.warning(f"Skipping footnote '{marker}' - would be inserted inside div block (paragraph {i+1})")
continue
modified_paragraph = paragraph
# Handle punctuation positioning
pattern = re.compile(rf"({re.escape(insert_after)})([.,;:!?])")
if pattern.search(paragraph):
modified_paragraph = pattern.sub(rf"\1{marker}\2", paragraph, count=1)
else:
word_pattern = re.compile(rf"({re.escape(insert_after)})(?![^\n]*\[\^)")
modified_paragraph = word_pattern.sub(rf"\1{marker}", paragraph, count=1)
paragraphs[i] = modified_paragraph
paragraph_footnotes[i].append(f"{marker}: {fn['footnote_text']}")
global_footnote_set.add(marker) # Mark this footnote as used
found = True
logging.info(f"Applied footnote {idx} after '{insert_after}' in paragraph {i+1}")
break
if not found:
logging.warning(f"Could not find phrase '{insert_after}' in any paragraph (or phrase is in div block)")
except Exception as e:
logging.error(f"Error applying footnote {idx}: {e}")
# Reconstruct paragraphs with their applied footnotes
for i, para in enumerate(paragraphs):
if paragraph_footnotes[i]:
footnote_text = "\n".join(paragraph_footnotes[i])
# Check if this paragraph contains an image/figure
is_image = bool(re.search(r'!\[.*?\]\(.*?\)', para))
# Add extra spacing after footnotes, especially for images and final paragraphs
if is_image or i == len(paragraphs) - 1:
modified_paragraphs.append(f"{para}\n\n{footnote_text}\n\n")
else:
modified_paragraphs.append(f"{para}\n\n{footnote_text}\n")
else:
modified_paragraphs.append(para)
# Always ensure proper spacing between sections
result = "\n\n".join(modified_paragraphs)
# Ensure there's proper spacing at the end of the section
if not result.endswith("\n\n"):
result = result.rstrip() + "\n\n"
return result
# --- Gradio GUI ---
def launch_gui(sections, headers, original_text, prompt_template, output_path, prologue): # Added prologue parameter
with gr.Blocks(css="""
/* Aggressively target all possible spacing sources */
.outline-btn {
text-align: left !important;
justify-content: flex-start !important;
padding: 0 8px !important; /* Zero vertical padding */
margin: 0 !important;
font-size: 0.9em !important;
background: none !important;
border: none !important;
box-shadow: none !important;
height: 16px !important; /* Extremely small height */
min-height: 0 !important;
color: #333 !important;
border-radius: 0 !important; /* Remove border radius */
font-weight: normal !important;
line-height: 1 !important;
display: block !important;
}
/* Target absolutely everything that could add space */
.outline-sidebar *,
.outline-sidebar > *,
.outline-sidebar > * > *,
.outline-sidebar > * > * > *,
.outline-sidebar button,
.outline-sidebar div {
margin: 0 !important;
padding: 0 !important;
line-height: 1 !important;
}
/* Target Gradio's button container classes specifically */
.outline-sidebar [class*="block"],
.outline-sidebar [class*="Block"],
.outline-sidebar [class*="container"],
.outline-sidebar [class*="Container"] {
margin: 0 !important;
padding: 0 !important;
display: block !important;
}
/* Force buttons to be butted against each other */
.outline-sidebar button + div,
.outline-sidebar div + button {
margin-top: -1px !important; /* Negative margin to collapse any remnant space */
}
/* Remove any default button styles from Gradio */
.outline-sidebar button {
border: none !important;
background-image: none !important;
box-shadow: none !important;
transition: none !important;
}
/* Target grandparent containers */
.outline-sidebar > div > div {
padding-top: 0 !important;
padding-bottom: 0 !important;
margin-top: 0 !important;
margin-bottom: 0 !important;
}
/* Force compact layout */
.outline-sidebar * {
line-height: 1 !important;
}
.container { width: 100%; }
.main-container { display: flex; }
.outline-sidebar {
width: 120px !important; /* Even narrower */
border-right: 1px solid #ddd;
min-height: 500px;
overflow-x: hidden;
}
.content-area { flex-grow: 1; padding: 0 15px; }
.footnote-box { max-height: 300px; overflow-y: auto; border: 1px solid #ddd; border-radius: 5px; padding: 10px; }
.progress-bar { margin-bottom: 15px; width: 100%; }
.section-display { background-color: white; border: 1px solid #ddd; padding: 15px; border-radius: 5px; }
.preview-box { background-color: white; border: 1px solid #ddd; padding: 15px; border-radius: 5px; }
.marker { color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px; }
.footnote-select { margin-bottom: 10px; }
.button-row { display: flex; justify-content: space-between; align-items: center; margin-top: 15px; }
.button-row button { margin: 0 5px; }
.status-message { color: #2E86C1; font-weight: bold; text-align: center; padding: 5px; }
""") as demo:
# Add this CSS to force vertical display of checkboxes
gr.HTML("""
<style>
/* Force checkboxes to display as a vertical list */
.footnote-box > div > div {
display: flex !important;
flex-direction: column !important;
}
/* Force each checkbox item to be full width */
.footnote-box > div > div > label {
width: 100% !important;
margin-bottom: 8px !important;
padding-bottom: 5px !important;
border-bottom: 1px solid #eee !important;
}
/* Ensure the checkbox container doesn't use grid layout */
.footnote-box .gr-form,
.footnote-box .gr-form > div,
.footnote-box .gr-panel {
display: block !important;
}
/* Target any grid layouts and override them */
.footnote-box [class*="grid"],
.footnote-box [style*="grid"] {
display: flex !important;
flex-direction: column !important;
}
</style>
""")
# Global state
current_section = gr.State(0) # Current section index
cached_footnotes = gr.State({}) # Cache footnotes by section
cached_selections = gr.State({}) # Cache selected options by section
updated_sections = gr.State(sections.copy()) # All updated sections
applied_sections = gr.State(set()) # Track which sections have been applied
# Hidden dropdown for JavaScript to use
section_dropdown = gr.Dropdown(
choices=[i for i in range(len(sections))],
value=0,
label="Section",
interactive=True,
visible=False,
elem_id="section-dropdown"
)
gr.Markdown(f"## Academic Footnote Assistant (Using {api_provider} API with model {model_name})")
# Main container with sidebar and content
with gr.Row(elem_classes=["main-container"]):
# Left sidebar for outline
with gr.Column(scale=2, min_width=100, elem_classes=["outline-sidebar"]):
gr.Markdown("### Document Outline")
# Add custom CSS to fix button styling with minimal spacing
gr.HTML("""
<style>
/* Target the button containers to remove extra space */
.outline-sidebar > div,
.outline-sidebar > div > div {
margin: 0 !important;
padding: 0 !important;
}
/* Make outline buttons ultra-compact */
.outline-btn {
text-align: left !important;
justify-content: flex-start !important;
padding: 1px 8px !important; /* Minimal vertical padding */
margin: 0 !important; /* No margins */
font-size: 0.9em !important;
background: none !important;
border: none !important;
box-shadow: none !important;
height: 20px !important; /* Fixed small height */
min-height: unset !important; /* Override min-height */
color: #333 !important;
border-radius: 3px !important;
font-weight: normal !important;
line-height: 1 !important; /* Minimal line height */
display: block !important;
overflow: hidden !important;
text-overflow: ellipsis !important;
white-space: nowrap !important;
}
.outline-btn:hover {
background-color: #f0f0f0 !important;
}
/* Ensure no extra space between buttons */
.outline-sidebar button + button,
.outline-sidebar div + div {
margin-top: 0 !important;
}
/* Compact any button container elements */
.outline-sidebar div[class*="container"],
.outline-sidebar div[class*="Container"] {
margin: 0 !important;
padding: 0 !important;
}
/* Hide any decorative elements that might add space */
.outline-sidebar div[class*="block"],
.outline-sidebar div[class*="Block"] {
margin: 0 !important;
padding: 0 !important;
}
/* Target the HTML components that add indentation styling */
.outline-sidebar > div > div:has(style) {
margin: 0 !important;
padding: 0 !important;
height: 0 !important;
overflow: hidden !important;
}
</style>
""")
# Create all buttons in a single container to avoid spacing
with gr.Column(elem_classes=["outline-buttons-container"]):
gr.HTML("""
<style>
.outline-buttons-container > div {
margin: 0 !important;
padding: 0 !important;
}
</style>
""")
# Create buttons for each header
for header in headers:
# Calculate indent
indent = 10 * (header["level"] - 1)
# Truncate long header text
display_text = header["text"]
if len(display_text) > 30:
display_text = display_text[:27] + "..."
# Create the button with indentation
btn = gr.Button(
display_text,
elem_classes=["outline-btn"],
elem_id=f"outline-btn-{header['index']}",
size="sm"
)
# Add CSS inline for this specific button
gr.HTML(f"""
<style>
#outline-btn-{header['index']} {{
margin-left: {indent}px !important;
width: calc(100% - {indent}px) !important;
margin-top: 0 !important;
margin-bottom: 0 !important;
}}
</style>
""")
# Set up click handler
def make_click_handler(idx):
def click_handler():
return idx
return click_handler
# Connect the button click to navigation
btn.click(
fn=make_click_handler(header["index"]),
inputs=[],
outputs=[current_section]
)
# Right content area for the main UI
with gr.Column(scale=9, elem_classes=["content-area"]):
# Section info and progress
with gr.Row(elem_classes=["container"]):
section_info = gr.Markdown("Section 1 of " + str(len(sections)), elem_classes=["status-message"])
with gr.Row(elem_classes=["progress-bar", "container"]):
progress = gr.Slider(minimum=1, maximum=len(sections), value=1, step=1, label="Progress", interactive=False)
# Section content
gr.Markdown("<div class='container'><strong>Section with Suggested Footnotes:</strong></div>")
section_html = gr.HTML(elem_classes=["container", "section-display"])
# Hidden section text (for reference only)
section_text = gr.Textbox(visible=False)
# Footnote selection
gr.Markdown("<div class='container'><strong>Select Footnotes to Apply:</strong></div>")
with gr.Column(elem_classes=["container", "footnote-select"]):
checkbox_group = gr.CheckboxGroup(
choices=[],
value=[],
label="",
elem_classes=["footnote-box"]
)
# Preview
gr.Markdown("<div class='container'><strong>Preview Result:</strong></div>")
preview_html = gr.HTML(elem_classes=["container", "preview-box"])
# Action buttons
with gr.Row(elem_classes=["container", "button-row"]):
prev_btn = gr.Button("⬅️ Previous")
regenerate_btn = gr.Button("🔄 Regenerate")
apply_btn = gr.Button("✅ Apply Section")
next_btn = gr.Button("Next ➡️")
save_btn = gr.Button("💾 Save & Exit", variant="primary", size="lg")
# Status display
status_display = gr.Markdown("", elem_classes=["container", "status-message"])
# Process LLM response for a section - used by both load_section and regenerate
def process_llm_response(section_idx, section_text_value, raw_response, cached_footnotes_dict, cached_selections_dict):
try:
# Try to extract JSON if it's embedded in markdown or examples
json_match = re.search(r'```json\s*(\{.*?\})\s*```', raw_response, re.DOTALL)
if json_match:
logging.info("Found JSON embedded in markdown, extracting...")
parsed_json = json_match.group(1)
data = json.loads(parsed_json)
else:
data = json.loads(raw_response)
footnotes = data.get("footnotes", [])
logging.info(f"Parsed {len(footnotes)} footnotes from LLM response")
# Verify footnote structure
valid_footnotes = []
for i, fn in enumerate(footnotes):
if all(key in fn for key in ["marker", "insert_after", "footnote_text"]):
valid_footnotes.append(fn)
else:
logging.warning(f"Footnote {i} missing required keys, skipping")
# Create choices for checkbox - show marker and text
checkbox_choices = []
for i, fn in enumerate(valid_footnotes):
# Format like actual footnotes: [^marker]: text
checkbox_choices.append(f"{i+1}. {fn['marker']}: {fn['footnote_text']}")
# Generate HTML with colored markers
section_with_markers = show_section_with_markers(section_text_value, valid_footnotes)
# Clear any previous selections for this section when regenerating
if str(section_idx) in cached_selections_dict:
del cached_selections_dict[str(section_idx)]
# Update the cache
new_footnotes_dict = dict(cached_footnotes_dict)
new_footnotes_dict[str(section_idx)] = valid_footnotes
# Show default preview
preview = section_text_value.replace("\n", "<br>")
return {
'section_html': section_with_markers,
'checkbox_choices': checkbox_choices,
'checkbox_value': [],
'preview_html': preview,
'status_msg': f"Loaded {len(valid_footnotes)} footnote suggestions for section {section_idx + 1}",
'valid_footnotes': valid_footnotes,
'cached_footnotes': new_footnotes_dict
}
except Exception as e:
logging.error(f"Error parsing LLM response: {e}")
logging.error(f"Raw response: {raw_response}")
return {
'section_html': f"<p>Error processing footnotes. Please check logs.</p><pre>{section_text_value}</pre>",
'checkbox_choices': [],
'checkbox_value': [],
'preview_html': section_text_value.replace("\n", "<br>"),
'status_msg': f"Error loading footnotes for section {section_idx + 1}",
'valid_footnotes': [],
'cached_footnotes': cached_footnotes_dict
}
# Function to load a section
def load_section(index, cached_footnotes_dict, cached_selections_dict, applied_sections_set):
logging.info(f"Loading section {index}")
if index < 0:
index = 0
if index >= len(sections):
return {
section_text: "",
section_html: "End of document reached.",
checkbox_group: gr.update(choices=[], value=[]),
preview_html: "",
progress: len(sections),
section_info: f"End of document reached",
status_display: "End of document reached. Click 'Save & Exit' to save your changes.",
}
section_text_value = sections[index]
section_key = str(index)
# Check if we already have footnotes for this section
if section_key in cached_footnotes_dict:
logging.info(f"Using cached footnotes for section {index}")
valid_footnotes = cached_footnotes_dict[section_key]
# Create choices for checkbox - show marker and text
checkbox_choices = []
for i, fn in enumerate(valid_footnotes):
# Format like actual footnotes: [^marker]: text
checkbox_choices.append(f"{i+1}. {fn['marker']}: {fn['footnote_text']}")
# Generate HTML with colored markers
section_with_markers = show_section_with_markers(section_text_value, valid_footnotes)
# Check if we have saved selections for this section
selected_values = []
if section_key in cached_selections_dict:
selected_values = cached_selections_dict[section_key]
# Generate preview based on saved selections
preview = section_text_value.replace("\n", "<br>")
if selected_values:
preview = show_preview_with_markers(section_text_value, selected_values, valid_footnotes)
applied_status = ""
if index in applied_sections_set:
applied_status = " (Applied)"
return {
section_text: section_text_value,
section_html: section_with_markers,
checkbox_group: gr.update(choices=checkbox_choices, value=selected_values),
preview_html: preview,
progress: index + 1,
section_info: f"Section {index + 1} of {len(sections)}{applied_status}",
status_display: f"Loaded section {index + 1} with {len(valid_footnotes)} footnote suggestions",
}
else:
# First time visiting this section - make the API call
logging.info(f"First visit to section {index} - making API call")
raw_response = get_footnote_suggestions(section_text_value, prompt_template)
result = process_llm_response(index, section_text_value, raw_response,
cached_footnotes_dict, cached_selections_dict)
return {
section_text: section_text_value,
section_html: result['section_html'],
checkbox_group: gr.update(choices=result['checkbox_choices'], value=result['checkbox_value']),
preview_html: result['preview_html'],
progress: index + 1,
section_info: f"Section {index + 1} of {len(sections)}",
status_display: result['status_msg'],
cached_footnotes: result['cached_footnotes'],
}
# Function to regenerate footnotes for current section
def regenerate_footnotes(section_idx, section_content, cached_footnotes_dict, cached_selections_dict):
logging.info(f"Regenerating footnotes for section {section_idx}")
raw_response = get_footnote_suggestions(section_content, prompt_template)
result = process_llm_response(section_idx, section_content, raw_response,
cached_footnotes_dict, cached_selections_dict)
return {
section_html: result['section_html'],
checkbox_group: gr.update(choices=result['checkbox_choices'], value=result['checkbox_value']),
preview_html: result['preview_html'],
status_display: f"Regenerated footnotes for section {section_idx + 1}",
cached_footnotes: result['cached_footnotes']
}
# Update preview based on checkbox selections
def update_preview(selected_options, section_content, footnotes_dict, section_idx):
section_key = str(section_idx)
if section_key not in footnotes_dict:
return section_content.replace("\n", "<br>")
footnotes_data = footnotes_dict[section_key]
if not selected_options:
return section_content.replace("\n", "<br>")
# Generate HTML with selected footnotes
preview = show_preview_with_markers(section_content, selected_options, footnotes_data)
return preview
# Global set to track applied footnotes across sections
global_footnote_set = set()
def apply_to_section(section_idx, selected_options, footnotes_dict, updates, cached_selections_dict, applied_sections_set):
"""
Apply selected footnotes to a section while preventing duplicate footnotes in the entire document.
"""
section_key = str(section_idx)
if section_key not in footnotes_dict:
return {
updated_sections: updates,
cached_selections: cached_selections_dict,
applied_sections: applied_sections_set,
section_info: f"Section {section_idx + 1} of {len(sections)}",
status_display: "No footnotes available for this section"
}
footnotes_data = footnotes_dict[section_key]
section_content = sections[section_idx]
if selected_options:
updated_text = apply_footnotes(section_content, selected_options, footnotes_data, global_footnote_set)
# Update stored document state
updated_sections_copy = updates.copy()
updated_sections_copy[section_idx] = updated_text
new_selections = dict(cached_selections_dict)
new_selections[section_key] = selected_options
new_applied = set(applied_sections_set)
new_applied.add(section_idx)
return {
updated_sections: updated_sections_copy,
cached_selections: new_selections,
applied_sections: new_applied,
section_info: f"Section {section_idx + 1} of {len(sections)} (Applied)",
status_display: f"Applied {len(selected_options)} footnotes to section {section_idx + 1}"
}
return {
updated_sections: updates,
cached_selections: cached_selections_dict,
applied_sections: applied_sections_set,
section_info: f"Section {section_idx + 1} of {len(sections)}",
status_display: "No footnotes selected to apply"
}
# Move to previous section
def prev_section(section_idx):
prev_idx = section_idx - 1
if prev_idx < 0:
return {
status_display: "Already at the first section"
}
else:
return {
current_section: prev_idx,
status_display: f"Moving to section {prev_idx + 1}"
}
# Move to next section
def next_section(section_idx):
next_idx = section_idx + 1
if next_idx >= len(sections):
return {
status_display: "End of document reached"
}
else:
return {
current_section: next_idx,
status_display: f"Moving to section {next_idx + 1}"
}
def save_document(updates):
try:
# We'll rebuild the document from scratch
rebuilt_document = []
changes_applied = a = 0
logging.info(f"Starting save process with {len(updates)} sections")
# Track which sections were actually changed
changed_sections = []
# Go through each section in order
for i, (original, updated) in enumerate(zip(sections, updates)):
# Determine if this section was modified
if original != updated:
# Add the updated version to our rebuilt document
rebuilt_document.append(updated)
changed_sections.append(i)
changes_applied += 1
logging.info(f"Section {i}: Using modified version")
else:
# Add the original version to our rebuilt document
rebuilt_document.append(original)
logging.info(f"Section {i}: Using original version")
# Join all sections to create the full document
full_text = prologue + "\n\n" + "\n".join(rebuilt_document)
# Create output filename
output_filename = output_path
if "." in output_path:
base, ext = output_path.rsplit(".", 1)
output_filename = f"{base}-footnoted.{ext}"
else:
output_filename = f"{output_path}-footnoted"
# Write the rebuilt document
with open(output_filename, "w", encoding="utf-8") as f:
f.write(full_text)
# Write a debug file with just the changed sections for verification
if changed_sections:
debug_filename = f"changes_debug_{int(time.time())}.txt"
with open(debug_filename, "w", encoding="utf-8") as f:
for idx in changed_sections:
f.write(f"=== SECTION {idx} ===\n")
f.write(f"ORIGINAL:\n{sections[idx]}\n\n")
f.write(f"UPDATED:\n{updates[idx]}\n\n")
f.write("="*50 + "\n\n")
logging.info(f"Saved document with {changes_applied} changed sections to {output_filename}")
return f"Document saved to {output_filename} with {changes_applied} changes"
except Exception as e:
import traceback
logging.error(f"Save error: {str(e)}")
logging.error(traceback.format_exc())
return f"Error: {str(e)}"
# Set up event handlers
# Section dropdown (for outline navigation)
section_dropdown.change(
fn=lambda x: x,
inputs=[section_dropdown],
outputs=[current_section]
)
# Regenerate button
regenerate_btn.click(
fn=regenerate_footnotes,
inputs=[current_section, section_text, cached_footnotes, cached_selections],
outputs=[section_html, checkbox_group, preview_html, status_display, cached_footnotes]
)
# Auto-update preview when checkboxes change
checkbox_group.change(
fn=update_preview,
inputs=[checkbox_group, section_text, cached_footnotes, current_section],
outputs=preview_html
)
# Apply footnotes to current section
apply_btn.click(
fn=apply_to_section,
inputs=[current_section, checkbox_group, cached_footnotes, updated_sections, cached_selections, applied_sections],
outputs=[updated_sections, cached_selections, applied_sections, section_info, status_display]
)
# Previous section button
prev_btn.click(
fn=prev_section,
inputs=[current_section],
outputs=[current_section, status_display]
)
# Next section button
next_btn.click(
fn=next_section,
inputs=[current_section],
outputs=[current_section, status_display]
)
# Save button
save_btn.click(
fn=save_document,
inputs=[updated_sections],
outputs=status_display
)
# Handle section changes
current_section.change(
fn=load_section,
inputs=[current_section, cached_footnotes, cached_selections, applied_sections],
outputs=[section_text, section_html, checkbox_group, preview_html,
progress, section_info, status_display, cached_footnotes]
)
# Load the first section on startup
demo.load(
fn=lambda: load_section(0, {}, {}, set()),
inputs=None,
outputs=[section_text, section_html, checkbox_group, preview_html,
progress, section_info, status_display, cached_footnotes]
)
logging.info("Launching Gradio interface")
demo.launch(share=True)
logging.info("Gradio interface closed")
def main():
# Set up argument parser
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", required=True, help="Input file path")
parser.add_argument("--prompt", default="prompt.txt", help="Prompt template file path (default: prompt.txt)")
parser.add_argument("--api", default="openai", choices=["openai", "groq"], help="API provider (default: openai)")
parser.add_argument("--model", help="Model name (defaults: gpt-4 for OpenAI, llama3-70b-8192 for Groq)")
args = parser.parse_args()
# Set default model based on API choice if not provided
global api_provider, model_name, client
api_provider = args.api.lower()
if args.model:
model_name = args.model
else:
# Default models
if api_provider == "openai":
model_name = "gpt-4-turbo"
elif api_provider == "groq":
model_name = "llama3-70b-8192"
else:
model_name = "gpt-4" # Default fallback
logging.info(f"Starting application with file: {args.file}, prompt template: {args.prompt}")
logging.info(f"Using API provider: {api_provider} with model: {model_name}")
# Initialize the appropriate client
api_key_var = "OPENAI_API_KEY" if api_provider == "openai" else "GROQ_API_KEY"
api_key = os.getenv(api_key_var)
if not api_key:
logging.error(f"Error: {api_key_var} environment variable is not set")
print(f"❌ Error: {api_key_var} environment variable is not set")
print(f"Please set it by running: export {api_key_var}=your_api_key")
return
# Initialize the client based on provider
if api_provider == "openai":
client = OpenAI(api_key=api_key)
elif api_provider == "groq":
client = Groq(api_key=api_key)
else:
logging.error(f"Unsupported API provider: {api_provider}")
print(f"❌ Error: Unsupported API provider: {api_provider}")
return
# Read the input file and prompt template
try:
with open(args.file) as f:
text = f.read()
logging.info(f"Successfully read input file: {args.file}")
with open(args.prompt) as f:
prompt = f.read()
logging.info(f"Successfully read prompt template: {args.prompt}")
except FileNotFoundError as e:
logging.error(f"File not found: {e}")
print(f"Error: {e}")
return
except Exception as e:
logging.error(f"Error reading files: {e}")
print(f"Error: {e}")
return
# Parse sections and headers
sections, headers, prologue = parse_qmd_sections(text) # Now properly unpacking prologue
launch_gui(sections, headers, text, prompt, args.file, prologue) # Pass prologue to the GUI
if __name__ == "__main__":
main()