mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-03 16:18:49 -05:00
1168 lines
48 KiB
Python
1168 lines
48 KiB
Python
import argparse
|
|
import time
|
|
import os
|
|
import json
|
|
import re
|
|
import gradio as gr
|
|
import logging
|
|
|
|
# Import client libraries
|
|
from openai import OpenAI
|
|
from groq import Groq
|
|
|
|
# Set up logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.StreamHandler(),
|
|
logging.FileHandler("footnote_assistant.log")
|
|
]
|
|
)
|
|
|
|
# Initialize client based on command-line choice
|
|
client = None
|
|
api_provider = None
|
|
model_name = None
|
|
|
|
# --- Parse document and extract sections and headers ---
|
|
def parse_qmd_sections(text):
|
|
logging.info("Parsing QMD sections")
|
|
lines = text.splitlines()
|
|
sections = []
|
|
headers = []
|
|
buffer = []
|
|
found_header = False
|
|
prologue = "" # Define prologue variable
|
|
|
|
for i, line in enumerate(lines):
|
|
if re.match(r'^#+\s+', line.strip()):
|
|
prologue = "\n".join(lines[:i]).strip()
|
|
lines = lines[i:]
|
|
break
|
|
|
|
for line in lines:
|
|
# Match headers with regex: #+ followed by space, then any text
|
|
if re.match(r'^#+\s+', line.strip()):
|
|
if found_header and buffer:
|
|
joined = "\n".join(buffer)
|
|
sections.append(joined)
|
|
buffer = []
|
|
found_header = True
|
|
if found_header:
|
|
buffer.append(line)
|
|
|
|
if buffer:
|
|
joined = "\n".join(buffer)
|
|
sections.append(joined)
|
|
|
|
# Extract headers for the outline
|
|
for i, section in enumerate(sections):
|
|
lines = section.split('\n')
|
|
if lines:
|
|
first_line = lines[0].strip()
|
|
# Use regex to extract header level and text properly
|
|
header_match = re.match(r'^(#+)\s+(.*?)$', first_line)
|
|
if header_match:
|
|
level = len(header_match.group(1))
|
|
header_text = header_match.group(2).strip()
|
|
headers.append({"text": header_text, "level": level, "index": i})
|
|
else:
|
|
# Fallback method if regex doesn't match
|
|
level = 0
|
|
for char in first_line:
|
|
if char == '#':
|
|
level += 1
|
|
else:
|
|
break
|
|
header_text = first_line[level:].strip()
|
|
headers.append({"text": header_text, "level": level, "index": i})
|
|
|
|
logging.info(f"Found {len(sections)} sections")
|
|
return sections, headers, prologue # Return prologue as well
|
|
|
|
# --- Replace section text in full file ---
|
|
def replace_section(full_text, old, new):
|
|
# If old text isn't found, log a warning
|
|
if old not in full_text:
|
|
logging.warning(f"Could not find section to replace. First 50 chars of section: {old[:50]}")
|
|
return full_text
|
|
|
|
# Otherwise replace it and return
|
|
return full_text.replace(old, new)
|
|
|
|
# --- Get LLM footnote suggestions ---
|
|
def get_footnote_suggestions(section_text, prompt_template):
|
|
logging.info(f"Getting footnote suggestions from {api_provider} LLM using model {model_name}")
|
|
|
|
# Don't use .format() at all - just concatenate the text at the end
|
|
if "{text}" in prompt_template:
|
|
complete_prompt = prompt_template.replace("{text}", section_text)
|
|
else:
|
|
complete_prompt = prompt_template + "\n\nText to analyze:\n" + section_text
|
|
|
|
# Save the prompt to a file for debugging
|
|
with open("last_prompt_sent.txt", "w") as f:
|
|
f.write(complete_prompt)
|
|
|
|
messages = [
|
|
{"role": "system", "content": "You are an academic footnote assistant. Your response must be valid JSON only."},
|
|
{"role": "user", "content": complete_prompt}
|
|
]
|
|
|
|
logging.info(f"Sending request to {api_provider} API")
|
|
try:
|
|
# Use the global client that was set at startup
|
|
if api_provider.lower() == "openai":
|
|
response = client.chat.completions.create(
|
|
model=model_name,
|
|
messages=messages
|
|
)
|
|
elif api_provider.lower() == "groq":
|
|
response = client.chat.completions.create(
|
|
model=model_name,
|
|
messages=messages
|
|
)
|
|
else:
|
|
raise ValueError(f"Unsupported API provider: {api_provider}")
|
|
|
|
content = response.choices[0].message.content
|
|
|
|
# Save the response to a file for debugging
|
|
with open("last_api_response.txt", "w") as f:
|
|
f.write(content)
|
|
|
|
logging.info(f"Received response: {content[:100]}...")
|
|
return content
|
|
except Exception as e:
|
|
logging.error(f"API error: {e}")
|
|
return json.dumps({"footnotes": []})
|
|
|
|
def show_section_with_markers(section_text, all_footnotes):
|
|
"""Show the section with colored footnote markers for all possible footnotes"""
|
|
|
|
if not all_footnotes:
|
|
return section_text.replace("\n", "<br>")
|
|
|
|
# First, identify paragraphs in the text
|
|
paragraphs = re.split(r'\n\s*\n', section_text)
|
|
modified_paragraphs = []
|
|
|
|
# Escape HTML characters
|
|
for para in paragraphs:
|
|
escaped_para = para.replace("<", "<").replace(">", ">")
|
|
|
|
# Add colored markers for footnotes
|
|
for fn in all_footnotes:
|
|
insert_after = fn["insert_after"]
|
|
marker = fn["marker"]
|
|
|
|
# Skip if insert_after is empty
|
|
if not insert_after.strip():
|
|
continue
|
|
|
|
# Check if the phrase is at the beginning of the paragraph
|
|
if escaped_para.strip().startswith(insert_after):
|
|
# Insert marker directly after the phrase
|
|
escaped_para = escaped_para.replace(
|
|
insert_after,
|
|
f'{insert_after}<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>',
|
|
1
|
|
)
|
|
continue
|
|
|
|
# Check for punctuation following the phrase
|
|
pattern = re.compile(rf"({re.escape(insert_after)})([.,;:!?])")
|
|
match = pattern.search(escaped_para)
|
|
|
|
if match:
|
|
# Insert the colored marker before the punctuation
|
|
escaped_para = pattern.sub(
|
|
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>\2',
|
|
escaped_para,
|
|
count=1
|
|
)
|
|
else:
|
|
# No punctuation, just add colored marker directly after the phrase
|
|
escaped = re.escape(insert_after)
|
|
pattern = re.compile(rf"({escaped})(?![^\n]*\[\^)")
|
|
escaped_para = pattern.sub(
|
|
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>',
|
|
escaped_para,
|
|
count=1
|
|
)
|
|
|
|
modified_paragraphs.append(escaped_para)
|
|
|
|
# Join paragraphs with proper spacing
|
|
html_text = "<br><br>".join(modified_paragraphs)
|
|
|
|
# Replace remaining newlines with <br> tags
|
|
html_text = html_text.replace("\n", "<br>")
|
|
|
|
return html_text
|
|
|
|
# --- Show preview with colored footnote markers for selected footnotes ---
|
|
def show_preview_with_markers(section_text, selected_options, all_footnotes):
|
|
"""Show the preview with colored footnote markers for selected footnotes and per-paragraph footnotes"""
|
|
|
|
# Extract the indices from the selected checkbox text
|
|
selected_indices = []
|
|
for option in selected_options:
|
|
# Extract the number from the format "1. [^fn-xxx]: text"
|
|
match = re.match(r'^(\d+)\.', option)
|
|
if match:
|
|
# Adjust for 1-based indexing in display vs 0-based in code
|
|
idx = int(match.group(1)) - 1
|
|
selected_indices.append(str(idx))
|
|
|
|
if not all_footnotes or not selected_indices:
|
|
return section_text.replace("\n", "<br>")
|
|
|
|
# First, identify paragraphs in the text
|
|
paragraphs = re.split(r'\n\s*\n', section_text)
|
|
modified_paragraphs = []
|
|
|
|
# Escape HTML characters and initialize tracking
|
|
escaped_paragraphs = []
|
|
paragraph_footnotes = [[] for _ in paragraphs]
|
|
|
|
for i, para in enumerate(paragraphs):
|
|
escaped_paragraphs.append(para.replace("<", "<").replace(">", ">"))
|
|
|
|
# For each selected footnote
|
|
for idx in selected_indices:
|
|
try:
|
|
idx_int = int(idx)
|
|
fn = all_footnotes[idx_int]
|
|
|
|
# Get the insert phrase
|
|
insert_after = fn["insert_after"]
|
|
marker = fn["marker"]
|
|
|
|
# Find which paragraph contains this phrase
|
|
found = False
|
|
for i, paragraph in enumerate(paragraphs):
|
|
if insert_after in paragraph:
|
|
# Get the modified paragraph text (already HTML-escaped)
|
|
modified_paragraph = escaped_paragraphs[i]
|
|
|
|
# Handle punctuation positioning
|
|
pattern = re.compile(rf"({re.escape(insert_after)})([.,;:!?])")
|
|
match = pattern.search(modified_paragraph)
|
|
|
|
if match:
|
|
# Use HTML for colored marker
|
|
modified_paragraph = pattern.sub(
|
|
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>\2',
|
|
modified_paragraph,
|
|
count=1
|
|
)
|
|
else:
|
|
# No punctuation, use HTML for colored marker
|
|
escaped = re.escape(insert_after)
|
|
pattern = re.compile(rf"({escaped})(?![^\n]*\[\^)")
|
|
modified_paragraph = pattern.sub(
|
|
rf'\1<span style="color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px;">{marker}</span>',
|
|
modified_paragraph,
|
|
count=1
|
|
)
|
|
|
|
# Update the modified paragraph
|
|
escaped_paragraphs[i] = modified_paragraph
|
|
|
|
# Add footnote to this paragraph's collection
|
|
paragraph_footnotes[i].append(f"<span style='color: #2E86C1; font-weight: bold;'>{marker}</span>: {fn['footnote_text']}")
|
|
|
|
found = True
|
|
break
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error applying footnote {idx}: {e}")
|
|
|
|
# Assemble paragraphs with their footnotes
|
|
for i, para in enumerate(escaped_paragraphs):
|
|
if paragraph_footnotes[i]:
|
|
footnote_html = "<div style='padding-left: 20px; margin-top: 10px; margin-bottom: 10px; border-left: 2px solid #ccc;'>"
|
|
footnote_html += "<br>".join(paragraph_footnotes[i])
|
|
footnote_html += "</div>"
|
|
modified_paragraphs.append(f"{para}{footnote_html}")
|
|
else:
|
|
modified_paragraphs.append(para)
|
|
|
|
# Join all paragraphs with proper spacing
|
|
html_text = "<br><br>".join(modified_paragraphs)
|
|
|
|
return html_text
|
|
|
|
def apply_footnotes(section_text, selected_options, all_footnotes, global_footnote_set):
|
|
"""
|
|
Apply selected footnotes to the section, ensuring no duplicates across the document.
|
|
IMPORTANT: Skips footnotes that would be inserted inside ::: div blocks.
|
|
"""
|
|
|
|
selected_indices = [int(re.match(r'^(\d+)\.', opt).group(1)) - 1 for opt in selected_options if re.match(r'^(\d+)\.', opt)]
|
|
if not all_footnotes or not selected_indices:
|
|
return section_text
|
|
|
|
paragraphs = re.split(r'\n\s*\n', section_text)
|
|
modified_paragraphs = []
|
|
paragraph_footnotes = [[] for _ in paragraphs]
|
|
|
|
# Track which paragraphs are inside div blocks
|
|
lines = section_text.split('\n')
|
|
in_div_block = False
|
|
div_paragraph_indices = set()
|
|
|
|
current_para_idx = 0
|
|
empty_line_count = 0
|
|
|
|
for line in lines:
|
|
# Track div blocks
|
|
if line.strip().startswith(':::'):
|
|
in_div_block = not in_div_block
|
|
|
|
# Track paragraph transitions (double newline)
|
|
if not line.strip():
|
|
empty_line_count += 1
|
|
if empty_line_count >= 1: # Paragraph break
|
|
current_para_idx += 1
|
|
empty_line_count = 0
|
|
else:
|
|
empty_line_count = 0
|
|
|
|
# Mark this paragraph as being in a div block
|
|
if in_div_block:
|
|
div_paragraph_indices.add(current_para_idx)
|
|
|
|
for idx in selected_indices:
|
|
try:
|
|
fn = all_footnotes[idx]
|
|
insert_after = fn["insert_after"]
|
|
marker = fn["marker"]
|
|
|
|
# Check if the footnote marker was already added before in the document
|
|
if marker in global_footnote_set:
|
|
logging.info(f"Skipping duplicate footnote marker: {marker}")
|
|
continue # Skip adding this marker again
|
|
|
|
# Find and modify the paragraph where this phrase appears
|
|
found = False
|
|
for i, paragraph in enumerate(paragraphs):
|
|
if insert_after in paragraph:
|
|
# Check if this paragraph is inside a div block
|
|
if i in div_paragraph_indices:
|
|
logging.warning(f"Skipping footnote '{marker}' - would be inserted inside div block (paragraph {i+1})")
|
|
continue
|
|
|
|
modified_paragraph = paragraph
|
|
|
|
# Handle punctuation positioning
|
|
pattern = re.compile(rf"({re.escape(insert_after)})([.,;:!?])")
|
|
if pattern.search(paragraph):
|
|
modified_paragraph = pattern.sub(rf"\1{marker}\2", paragraph, count=1)
|
|
else:
|
|
word_pattern = re.compile(rf"({re.escape(insert_after)})(?![^\n]*\[\^)")
|
|
modified_paragraph = word_pattern.sub(rf"\1{marker}", paragraph, count=1)
|
|
|
|
paragraphs[i] = modified_paragraph
|
|
paragraph_footnotes[i].append(f"{marker}: {fn['footnote_text']}")
|
|
global_footnote_set.add(marker) # Mark this footnote as used
|
|
found = True
|
|
logging.info(f"Applied footnote {idx} after '{insert_after}' in paragraph {i+1}")
|
|
break
|
|
|
|
if not found:
|
|
logging.warning(f"Could not find phrase '{insert_after}' in any paragraph (or phrase is in div block)")
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error applying footnote {idx}: {e}")
|
|
|
|
# Reconstruct paragraphs with their applied footnotes
|
|
for i, para in enumerate(paragraphs):
|
|
if paragraph_footnotes[i]:
|
|
footnote_text = "\n".join(paragraph_footnotes[i])
|
|
|
|
# Check if this paragraph contains an image/figure
|
|
is_image = bool(re.search(r'!\[.*?\]\(.*?\)', para))
|
|
|
|
# Add extra spacing after footnotes, especially for images and final paragraphs
|
|
if is_image or i == len(paragraphs) - 1:
|
|
modified_paragraphs.append(f"{para}\n\n{footnote_text}\n\n")
|
|
else:
|
|
modified_paragraphs.append(f"{para}\n\n{footnote_text}\n")
|
|
else:
|
|
modified_paragraphs.append(para)
|
|
|
|
# Always ensure proper spacing between sections
|
|
result = "\n\n".join(modified_paragraphs)
|
|
|
|
# Ensure there's proper spacing at the end of the section
|
|
if not result.endswith("\n\n"):
|
|
result = result.rstrip() + "\n\n"
|
|
|
|
return result
|
|
|
|
# --- Gradio GUI ---
|
|
def launch_gui(sections, headers, original_text, prompt_template, output_path, prologue): # Added prologue parameter
|
|
|
|
with gr.Blocks(css="""
|
|
|
|
/* Aggressively target all possible spacing sources */
|
|
.outline-btn {
|
|
text-align: left !important;
|
|
justify-content: flex-start !important;
|
|
padding: 0 8px !important; /* Zero vertical padding */
|
|
margin: 0 !important;
|
|
font-size: 0.9em !important;
|
|
background: none !important;
|
|
border: none !important;
|
|
box-shadow: none !important;
|
|
height: 16px !important; /* Extremely small height */
|
|
min-height: 0 !important;
|
|
color: #333 !important;
|
|
border-radius: 0 !important; /* Remove border radius */
|
|
font-weight: normal !important;
|
|
line-height: 1 !important;
|
|
display: block !important;
|
|
}
|
|
|
|
/* Target absolutely everything that could add space */
|
|
.outline-sidebar *,
|
|
.outline-sidebar > *,
|
|
.outline-sidebar > * > *,
|
|
.outline-sidebar > * > * > *,
|
|
.outline-sidebar button,
|
|
.outline-sidebar div {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
line-height: 1 !important;
|
|
}
|
|
|
|
/* Target Gradio's button container classes specifically */
|
|
.outline-sidebar [class*="block"],
|
|
.outline-sidebar [class*="Block"],
|
|
.outline-sidebar [class*="container"],
|
|
.outline-sidebar [class*="Container"] {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
display: block !important;
|
|
}
|
|
|
|
/* Force buttons to be butted against each other */
|
|
.outline-sidebar button + div,
|
|
.outline-sidebar div + button {
|
|
margin-top: -1px !important; /* Negative margin to collapse any remnant space */
|
|
}
|
|
|
|
/* Remove any default button styles from Gradio */
|
|
.outline-sidebar button {
|
|
border: none !important;
|
|
background-image: none !important;
|
|
box-shadow: none !important;
|
|
transition: none !important;
|
|
}
|
|
|
|
/* Target grandparent containers */
|
|
.outline-sidebar > div > div {
|
|
padding-top: 0 !important;
|
|
padding-bottom: 0 !important;
|
|
margin-top: 0 !important;
|
|
margin-bottom: 0 !important;
|
|
}
|
|
|
|
/* Force compact layout */
|
|
.outline-sidebar * {
|
|
line-height: 1 !important;
|
|
}
|
|
.container { width: 100%; }
|
|
.main-container { display: flex; }
|
|
|
|
.outline-sidebar {
|
|
width: 120px !important; /* Even narrower */
|
|
border-right: 1px solid #ddd;
|
|
min-height: 500px;
|
|
overflow-x: hidden;
|
|
}
|
|
.content-area { flex-grow: 1; padding: 0 15px; }
|
|
.footnote-box { max-height: 300px; overflow-y: auto; border: 1px solid #ddd; border-radius: 5px; padding: 10px; }
|
|
.progress-bar { margin-bottom: 15px; width: 100%; }
|
|
.section-display { background-color: white; border: 1px solid #ddd; padding: 15px; border-radius: 5px; }
|
|
.preview-box { background-color: white; border: 1px solid #ddd; padding: 15px; border-radius: 5px; }
|
|
.marker { color: #2E86C1; font-weight: bold; background-color: #EBF5FB; border-radius: 3px; padding: 0 2px; }
|
|
.footnote-select { margin-bottom: 10px; }
|
|
.button-row { display: flex; justify-content: space-between; align-items: center; margin-top: 15px; }
|
|
.button-row button { margin: 0 5px; }
|
|
.status-message { color: #2E86C1; font-weight: bold; text-align: center; padding: 5px; }
|
|
""") as demo:
|
|
|
|
# Add this CSS to force vertical display of checkboxes
|
|
gr.HTML("""
|
|
<style>
|
|
/* Force checkboxes to display as a vertical list */
|
|
.footnote-box > div > div {
|
|
display: flex !important;
|
|
flex-direction: column !important;
|
|
}
|
|
|
|
/* Force each checkbox item to be full width */
|
|
.footnote-box > div > div > label {
|
|
width: 100% !important;
|
|
margin-bottom: 8px !important;
|
|
padding-bottom: 5px !important;
|
|
border-bottom: 1px solid #eee !important;
|
|
}
|
|
|
|
/* Ensure the checkbox container doesn't use grid layout */
|
|
.footnote-box .gr-form,
|
|
.footnote-box .gr-form > div,
|
|
.footnote-box .gr-panel {
|
|
display: block !important;
|
|
}
|
|
|
|
/* Target any grid layouts and override them */
|
|
.footnote-box [class*="grid"],
|
|
.footnote-box [style*="grid"] {
|
|
display: flex !important;
|
|
flex-direction: column !important;
|
|
}
|
|
</style>
|
|
""")
|
|
|
|
# Global state
|
|
current_section = gr.State(0) # Current section index
|
|
cached_footnotes = gr.State({}) # Cache footnotes by section
|
|
cached_selections = gr.State({}) # Cache selected options by section
|
|
updated_sections = gr.State(sections.copy()) # All updated sections
|
|
applied_sections = gr.State(set()) # Track which sections have been applied
|
|
|
|
# Hidden dropdown for JavaScript to use
|
|
section_dropdown = gr.Dropdown(
|
|
choices=[i for i in range(len(sections))],
|
|
value=0,
|
|
label="Section",
|
|
interactive=True,
|
|
visible=False,
|
|
elem_id="section-dropdown"
|
|
)
|
|
|
|
gr.Markdown(f"## Academic Footnote Assistant (Using {api_provider} API with model {model_name})")
|
|
|
|
# Main container with sidebar and content
|
|
with gr.Row(elem_classes=["main-container"]):
|
|
# Left sidebar for outline
|
|
with gr.Column(scale=2, min_width=100, elem_classes=["outline-sidebar"]):
|
|
gr.Markdown("### Document Outline")
|
|
|
|
# Add custom CSS to fix button styling with minimal spacing
|
|
gr.HTML("""
|
|
<style>
|
|
/* Target the button containers to remove extra space */
|
|
.outline-sidebar > div,
|
|
.outline-sidebar > div > div {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
}
|
|
|
|
/* Make outline buttons ultra-compact */
|
|
.outline-btn {
|
|
text-align: left !important;
|
|
justify-content: flex-start !important;
|
|
padding: 1px 8px !important; /* Minimal vertical padding */
|
|
margin: 0 !important; /* No margins */
|
|
font-size: 0.9em !important;
|
|
background: none !important;
|
|
border: none !important;
|
|
box-shadow: none !important;
|
|
height: 20px !important; /* Fixed small height */
|
|
min-height: unset !important; /* Override min-height */
|
|
color: #333 !important;
|
|
border-radius: 3px !important;
|
|
font-weight: normal !important;
|
|
line-height: 1 !important; /* Minimal line height */
|
|
display: block !important;
|
|
overflow: hidden !important;
|
|
text-overflow: ellipsis !important;
|
|
white-space: nowrap !important;
|
|
}
|
|
|
|
.outline-btn:hover {
|
|
background-color: #f0f0f0 !important;
|
|
}
|
|
|
|
/* Ensure no extra space between buttons */
|
|
.outline-sidebar button + button,
|
|
.outline-sidebar div + div {
|
|
margin-top: 0 !important;
|
|
}
|
|
|
|
/* Compact any button container elements */
|
|
.outline-sidebar div[class*="container"],
|
|
.outline-sidebar div[class*="Container"] {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
}
|
|
|
|
/* Hide any decorative elements that might add space */
|
|
.outline-sidebar div[class*="block"],
|
|
.outline-sidebar div[class*="Block"] {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
}
|
|
|
|
/* Target the HTML components that add indentation styling */
|
|
.outline-sidebar > div > div:has(style) {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
height: 0 !important;
|
|
overflow: hidden !important;
|
|
}
|
|
</style>
|
|
""")
|
|
|
|
# Create all buttons in a single container to avoid spacing
|
|
with gr.Column(elem_classes=["outline-buttons-container"]):
|
|
gr.HTML("""
|
|
<style>
|
|
.outline-buttons-container > div {
|
|
margin: 0 !important;
|
|
padding: 0 !important;
|
|
}
|
|
</style>
|
|
""")
|
|
|
|
# Create buttons for each header
|
|
for header in headers:
|
|
# Calculate indent
|
|
indent = 10 * (header["level"] - 1)
|
|
|
|
# Truncate long header text
|
|
display_text = header["text"]
|
|
if len(display_text) > 30:
|
|
display_text = display_text[:27] + "..."
|
|
|
|
# Create the button with indentation
|
|
btn = gr.Button(
|
|
display_text,
|
|
elem_classes=["outline-btn"],
|
|
elem_id=f"outline-btn-{header['index']}",
|
|
size="sm"
|
|
)
|
|
|
|
# Add CSS inline for this specific button
|
|
gr.HTML(f"""
|
|
<style>
|
|
#outline-btn-{header['index']} {{
|
|
margin-left: {indent}px !important;
|
|
width: calc(100% - {indent}px) !important;
|
|
margin-top: 0 !important;
|
|
margin-bottom: 0 !important;
|
|
}}
|
|
</style>
|
|
""")
|
|
|
|
# Set up click handler
|
|
def make_click_handler(idx):
|
|
def click_handler():
|
|
return idx
|
|
return click_handler
|
|
|
|
# Connect the button click to navigation
|
|
btn.click(
|
|
fn=make_click_handler(header["index"]),
|
|
inputs=[],
|
|
outputs=[current_section]
|
|
)
|
|
|
|
# Right content area for the main UI
|
|
with gr.Column(scale=9, elem_classes=["content-area"]):
|
|
# Section info and progress
|
|
with gr.Row(elem_classes=["container"]):
|
|
section_info = gr.Markdown("Section 1 of " + str(len(sections)), elem_classes=["status-message"])
|
|
|
|
with gr.Row(elem_classes=["progress-bar", "container"]):
|
|
progress = gr.Slider(minimum=1, maximum=len(sections), value=1, step=1, label="Progress", interactive=False)
|
|
|
|
# Section content
|
|
gr.Markdown("<div class='container'><strong>Section with Suggested Footnotes:</strong></div>")
|
|
section_html = gr.HTML(elem_classes=["container", "section-display"])
|
|
|
|
# Hidden section text (for reference only)
|
|
section_text = gr.Textbox(visible=False)
|
|
|
|
# Footnote selection
|
|
gr.Markdown("<div class='container'><strong>Select Footnotes to Apply:</strong></div>")
|
|
|
|
with gr.Column(elem_classes=["container", "footnote-select"]):
|
|
checkbox_group = gr.CheckboxGroup(
|
|
choices=[],
|
|
value=[],
|
|
label="",
|
|
elem_classes=["footnote-box"]
|
|
)
|
|
|
|
# Preview
|
|
gr.Markdown("<div class='container'><strong>Preview Result:</strong></div>")
|
|
preview_html = gr.HTML(elem_classes=["container", "preview-box"])
|
|
|
|
# Action buttons
|
|
with gr.Row(elem_classes=["container", "button-row"]):
|
|
prev_btn = gr.Button("⬅️ Previous")
|
|
regenerate_btn = gr.Button("🔄 Regenerate")
|
|
apply_btn = gr.Button("✅ Apply Section")
|
|
next_btn = gr.Button("Next ➡️")
|
|
save_btn = gr.Button("💾 Save & Exit", variant="primary", size="lg")
|
|
|
|
# Status display
|
|
status_display = gr.Markdown("", elem_classes=["container", "status-message"])
|
|
|
|
# Process LLM response for a section - used by both load_section and regenerate
|
|
def process_llm_response(section_idx, section_text_value, raw_response, cached_footnotes_dict, cached_selections_dict):
|
|
try:
|
|
# Try to extract JSON if it's embedded in markdown or examples
|
|
json_match = re.search(r'```json\s*(\{.*?\})\s*```', raw_response, re.DOTALL)
|
|
if json_match:
|
|
logging.info("Found JSON embedded in markdown, extracting...")
|
|
parsed_json = json_match.group(1)
|
|
data = json.loads(parsed_json)
|
|
else:
|
|
data = json.loads(raw_response)
|
|
|
|
footnotes = data.get("footnotes", [])
|
|
logging.info(f"Parsed {len(footnotes)} footnotes from LLM response")
|
|
|
|
# Verify footnote structure
|
|
valid_footnotes = []
|
|
for i, fn in enumerate(footnotes):
|
|
if all(key in fn for key in ["marker", "insert_after", "footnote_text"]):
|
|
valid_footnotes.append(fn)
|
|
else:
|
|
logging.warning(f"Footnote {i} missing required keys, skipping")
|
|
|
|
# Create choices for checkbox - show marker and text
|
|
checkbox_choices = []
|
|
for i, fn in enumerate(valid_footnotes):
|
|
# Format like actual footnotes: [^marker]: text
|
|
checkbox_choices.append(f"{i+1}. {fn['marker']}: {fn['footnote_text']}")
|
|
|
|
# Generate HTML with colored markers
|
|
section_with_markers = show_section_with_markers(section_text_value, valid_footnotes)
|
|
|
|
# Clear any previous selections for this section when regenerating
|
|
if str(section_idx) in cached_selections_dict:
|
|
del cached_selections_dict[str(section_idx)]
|
|
|
|
# Update the cache
|
|
new_footnotes_dict = dict(cached_footnotes_dict)
|
|
new_footnotes_dict[str(section_idx)] = valid_footnotes
|
|
|
|
# Show default preview
|
|
preview = section_text_value.replace("\n", "<br>")
|
|
|
|
return {
|
|
'section_html': section_with_markers,
|
|
'checkbox_choices': checkbox_choices,
|
|
'checkbox_value': [],
|
|
'preview_html': preview,
|
|
'status_msg': f"Loaded {len(valid_footnotes)} footnote suggestions for section {section_idx + 1}",
|
|
'valid_footnotes': valid_footnotes,
|
|
'cached_footnotes': new_footnotes_dict
|
|
}
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error parsing LLM response: {e}")
|
|
logging.error(f"Raw response: {raw_response}")
|
|
|
|
return {
|
|
'section_html': f"<p>Error processing footnotes. Please check logs.</p><pre>{section_text_value}</pre>",
|
|
'checkbox_choices': [],
|
|
'checkbox_value': [],
|
|
'preview_html': section_text_value.replace("\n", "<br>"),
|
|
'status_msg': f"Error loading footnotes for section {section_idx + 1}",
|
|
'valid_footnotes': [],
|
|
'cached_footnotes': cached_footnotes_dict
|
|
}
|
|
|
|
# Function to load a section
|
|
def load_section(index, cached_footnotes_dict, cached_selections_dict, applied_sections_set):
|
|
logging.info(f"Loading section {index}")
|
|
if index < 0:
|
|
index = 0
|
|
if index >= len(sections):
|
|
return {
|
|
section_text: "",
|
|
section_html: "End of document reached.",
|
|
checkbox_group: gr.update(choices=[], value=[]),
|
|
preview_html: "",
|
|
progress: len(sections),
|
|
section_info: f"End of document reached",
|
|
status_display: "End of document reached. Click 'Save & Exit' to save your changes.",
|
|
}
|
|
|
|
section_text_value = sections[index]
|
|
section_key = str(index)
|
|
|
|
# Check if we already have footnotes for this section
|
|
if section_key in cached_footnotes_dict:
|
|
logging.info(f"Using cached footnotes for section {index}")
|
|
valid_footnotes = cached_footnotes_dict[section_key]
|
|
|
|
# Create choices for checkbox - show marker and text
|
|
checkbox_choices = []
|
|
for i, fn in enumerate(valid_footnotes):
|
|
# Format like actual footnotes: [^marker]: text
|
|
checkbox_choices.append(f"{i+1}. {fn['marker']}: {fn['footnote_text']}")
|
|
|
|
# Generate HTML with colored markers
|
|
section_with_markers = show_section_with_markers(section_text_value, valid_footnotes)
|
|
|
|
# Check if we have saved selections for this section
|
|
selected_values = []
|
|
if section_key in cached_selections_dict:
|
|
selected_values = cached_selections_dict[section_key]
|
|
|
|
# Generate preview based on saved selections
|
|
preview = section_text_value.replace("\n", "<br>")
|
|
if selected_values:
|
|
preview = show_preview_with_markers(section_text_value, selected_values, valid_footnotes)
|
|
|
|
applied_status = ""
|
|
if index in applied_sections_set:
|
|
applied_status = " (Applied)"
|
|
|
|
return {
|
|
section_text: section_text_value,
|
|
section_html: section_with_markers,
|
|
checkbox_group: gr.update(choices=checkbox_choices, value=selected_values),
|
|
preview_html: preview,
|
|
progress: index + 1,
|
|
section_info: f"Section {index + 1} of {len(sections)}{applied_status}",
|
|
status_display: f"Loaded section {index + 1} with {len(valid_footnotes)} footnote suggestions",
|
|
}
|
|
else:
|
|
# First time visiting this section - make the API call
|
|
logging.info(f"First visit to section {index} - making API call")
|
|
raw_response = get_footnote_suggestions(section_text_value, prompt_template)
|
|
|
|
result = process_llm_response(index, section_text_value, raw_response,
|
|
cached_footnotes_dict, cached_selections_dict)
|
|
|
|
return {
|
|
section_text: section_text_value,
|
|
section_html: result['section_html'],
|
|
checkbox_group: gr.update(choices=result['checkbox_choices'], value=result['checkbox_value']),
|
|
preview_html: result['preview_html'],
|
|
progress: index + 1,
|
|
section_info: f"Section {index + 1} of {len(sections)}",
|
|
status_display: result['status_msg'],
|
|
cached_footnotes: result['cached_footnotes'],
|
|
}
|
|
|
|
# Function to regenerate footnotes for current section
|
|
def regenerate_footnotes(section_idx, section_content, cached_footnotes_dict, cached_selections_dict):
|
|
logging.info(f"Regenerating footnotes for section {section_idx}")
|
|
raw_response = get_footnote_suggestions(section_content, prompt_template)
|
|
|
|
result = process_llm_response(section_idx, section_content, raw_response,
|
|
cached_footnotes_dict, cached_selections_dict)
|
|
|
|
return {
|
|
section_html: result['section_html'],
|
|
checkbox_group: gr.update(choices=result['checkbox_choices'], value=result['checkbox_value']),
|
|
preview_html: result['preview_html'],
|
|
status_display: f"Regenerated footnotes for section {section_idx + 1}",
|
|
cached_footnotes: result['cached_footnotes']
|
|
}
|
|
|
|
# Update preview based on checkbox selections
|
|
def update_preview(selected_options, section_content, footnotes_dict, section_idx):
|
|
section_key = str(section_idx)
|
|
if section_key not in footnotes_dict:
|
|
return section_content.replace("\n", "<br>")
|
|
|
|
footnotes_data = footnotes_dict[section_key]
|
|
|
|
if not selected_options:
|
|
return section_content.replace("\n", "<br>")
|
|
|
|
# Generate HTML with selected footnotes
|
|
preview = show_preview_with_markers(section_content, selected_options, footnotes_data)
|
|
return preview
|
|
|
|
# Global set to track applied footnotes across sections
|
|
global_footnote_set = set()
|
|
|
|
def apply_to_section(section_idx, selected_options, footnotes_dict, updates, cached_selections_dict, applied_sections_set):
|
|
"""
|
|
Apply selected footnotes to a section while preventing duplicate footnotes in the entire document.
|
|
"""
|
|
section_key = str(section_idx)
|
|
if section_key not in footnotes_dict:
|
|
return {
|
|
updated_sections: updates,
|
|
cached_selections: cached_selections_dict,
|
|
applied_sections: applied_sections_set,
|
|
section_info: f"Section {section_idx + 1} of {len(sections)}",
|
|
status_display: "No footnotes available for this section"
|
|
}
|
|
|
|
footnotes_data = footnotes_dict[section_key]
|
|
section_content = sections[section_idx]
|
|
|
|
if selected_options:
|
|
updated_text = apply_footnotes(section_content, selected_options, footnotes_data, global_footnote_set)
|
|
|
|
# Update stored document state
|
|
updated_sections_copy = updates.copy()
|
|
updated_sections_copy[section_idx] = updated_text
|
|
|
|
new_selections = dict(cached_selections_dict)
|
|
new_selections[section_key] = selected_options
|
|
|
|
new_applied = set(applied_sections_set)
|
|
new_applied.add(section_idx)
|
|
|
|
return {
|
|
updated_sections: updated_sections_copy,
|
|
cached_selections: new_selections,
|
|
applied_sections: new_applied,
|
|
section_info: f"Section {section_idx + 1} of {len(sections)} (Applied)",
|
|
status_display: f"Applied {len(selected_options)} footnotes to section {section_idx + 1}"
|
|
}
|
|
|
|
return {
|
|
updated_sections: updates,
|
|
cached_selections: cached_selections_dict,
|
|
applied_sections: applied_sections_set,
|
|
section_info: f"Section {section_idx + 1} of {len(sections)}",
|
|
status_display: "No footnotes selected to apply"
|
|
}
|
|
|
|
|
|
# Move to previous section
|
|
def prev_section(section_idx):
|
|
prev_idx = section_idx - 1
|
|
if prev_idx < 0:
|
|
return {
|
|
status_display: "Already at the first section"
|
|
}
|
|
else:
|
|
return {
|
|
current_section: prev_idx,
|
|
status_display: f"Moving to section {prev_idx + 1}"
|
|
}
|
|
|
|
# Move to next section
|
|
def next_section(section_idx):
|
|
next_idx = section_idx + 1
|
|
if next_idx >= len(sections):
|
|
return {
|
|
status_display: "End of document reached"
|
|
}
|
|
else:
|
|
return {
|
|
current_section: next_idx,
|
|
status_display: f"Moving to section {next_idx + 1}"
|
|
}
|
|
|
|
def save_document(updates):
|
|
try:
|
|
# We'll rebuild the document from scratch
|
|
rebuilt_document = []
|
|
changes_applied = a = 0
|
|
|
|
logging.info(f"Starting save process with {len(updates)} sections")
|
|
|
|
# Track which sections were actually changed
|
|
changed_sections = []
|
|
|
|
# Go through each section in order
|
|
for i, (original, updated) in enumerate(zip(sections, updates)):
|
|
# Determine if this section was modified
|
|
if original != updated:
|
|
# Add the updated version to our rebuilt document
|
|
rebuilt_document.append(updated)
|
|
changed_sections.append(i)
|
|
changes_applied += 1
|
|
logging.info(f"Section {i}: Using modified version")
|
|
else:
|
|
# Add the original version to our rebuilt document
|
|
rebuilt_document.append(original)
|
|
logging.info(f"Section {i}: Using original version")
|
|
|
|
# Join all sections to create the full document
|
|
full_text = prologue + "\n\n" + "\n".join(rebuilt_document)
|
|
|
|
# Create output filename
|
|
output_filename = output_path
|
|
if "." in output_path:
|
|
base, ext = output_path.rsplit(".", 1)
|
|
output_filename = f"{base}-footnoted.{ext}"
|
|
else:
|
|
output_filename = f"{output_path}-footnoted"
|
|
|
|
# Write the rebuilt document
|
|
with open(output_filename, "w", encoding="utf-8") as f:
|
|
f.write(full_text)
|
|
|
|
# Write a debug file with just the changed sections for verification
|
|
if changed_sections:
|
|
debug_filename = f"changes_debug_{int(time.time())}.txt"
|
|
with open(debug_filename, "w", encoding="utf-8") as f:
|
|
for idx in changed_sections:
|
|
f.write(f"=== SECTION {idx} ===\n")
|
|
f.write(f"ORIGINAL:\n{sections[idx]}\n\n")
|
|
f.write(f"UPDATED:\n{updates[idx]}\n\n")
|
|
f.write("="*50 + "\n\n")
|
|
|
|
logging.info(f"Saved document with {changes_applied} changed sections to {output_filename}")
|
|
return f"Document saved to {output_filename} with {changes_applied} changes"
|
|
except Exception as e:
|
|
import traceback
|
|
logging.error(f"Save error: {str(e)}")
|
|
logging.error(traceback.format_exc())
|
|
return f"Error: {str(e)}"
|
|
|
|
# Set up event handlers
|
|
# Section dropdown (for outline navigation)
|
|
section_dropdown.change(
|
|
fn=lambda x: x,
|
|
inputs=[section_dropdown],
|
|
outputs=[current_section]
|
|
)
|
|
|
|
# Regenerate button
|
|
regenerate_btn.click(
|
|
fn=regenerate_footnotes,
|
|
inputs=[current_section, section_text, cached_footnotes, cached_selections],
|
|
outputs=[section_html, checkbox_group, preview_html, status_display, cached_footnotes]
|
|
)
|
|
|
|
# Auto-update preview when checkboxes change
|
|
checkbox_group.change(
|
|
fn=update_preview,
|
|
inputs=[checkbox_group, section_text, cached_footnotes, current_section],
|
|
outputs=preview_html
|
|
)
|
|
|
|
# Apply footnotes to current section
|
|
apply_btn.click(
|
|
fn=apply_to_section,
|
|
inputs=[current_section, checkbox_group, cached_footnotes, updated_sections, cached_selections, applied_sections],
|
|
outputs=[updated_sections, cached_selections, applied_sections, section_info, status_display]
|
|
)
|
|
|
|
# Previous section button
|
|
prev_btn.click(
|
|
fn=prev_section,
|
|
inputs=[current_section],
|
|
outputs=[current_section, status_display]
|
|
)
|
|
|
|
# Next section button
|
|
next_btn.click(
|
|
fn=next_section,
|
|
inputs=[current_section],
|
|
outputs=[current_section, status_display]
|
|
)
|
|
|
|
# Save button
|
|
save_btn.click(
|
|
fn=save_document,
|
|
inputs=[updated_sections],
|
|
outputs=status_display
|
|
)
|
|
|
|
# Handle section changes
|
|
current_section.change(
|
|
fn=load_section,
|
|
inputs=[current_section, cached_footnotes, cached_selections, applied_sections],
|
|
outputs=[section_text, section_html, checkbox_group, preview_html,
|
|
progress, section_info, status_display, cached_footnotes]
|
|
)
|
|
|
|
# Load the first section on startup
|
|
demo.load(
|
|
fn=lambda: load_section(0, {}, {}, set()),
|
|
inputs=None,
|
|
outputs=[section_text, section_html, checkbox_group, preview_html,
|
|
progress, section_info, status_display, cached_footnotes]
|
|
)
|
|
|
|
logging.info("Launching Gradio interface")
|
|
demo.launch(share=True)
|
|
logging.info("Gradio interface closed")
|
|
|
|
def main():
|
|
# Set up argument parser
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-f", "--file", required=True, help="Input file path")
|
|
parser.add_argument("--prompt", default="prompt.txt", help="Prompt template file path (default: prompt.txt)")
|
|
parser.add_argument("--api", default="openai", choices=["openai", "groq"], help="API provider (default: openai)")
|
|
parser.add_argument("--model", help="Model name (defaults: gpt-4 for OpenAI, llama3-70b-8192 for Groq)")
|
|
args = parser.parse_args()
|
|
|
|
# Set default model based on API choice if not provided
|
|
global api_provider, model_name, client
|
|
api_provider = args.api.lower()
|
|
|
|
if args.model:
|
|
model_name = args.model
|
|
else:
|
|
# Default models
|
|
if api_provider == "openai":
|
|
model_name = "gpt-4-turbo"
|
|
elif api_provider == "groq":
|
|
model_name = "llama3-70b-8192"
|
|
else:
|
|
model_name = "gpt-4" # Default fallback
|
|
|
|
logging.info(f"Starting application with file: {args.file}, prompt template: {args.prompt}")
|
|
logging.info(f"Using API provider: {api_provider} with model: {model_name}")
|
|
|
|
# Initialize the appropriate client
|
|
api_key_var = "OPENAI_API_KEY" if api_provider == "openai" else "GROQ_API_KEY"
|
|
api_key = os.getenv(api_key_var)
|
|
|
|
if not api_key:
|
|
logging.error(f"Error: {api_key_var} environment variable is not set")
|
|
print(f"❌ Error: {api_key_var} environment variable is not set")
|
|
print(f"Please set it by running: export {api_key_var}=your_api_key")
|
|
return
|
|
|
|
# Initialize the client based on provider
|
|
if api_provider == "openai":
|
|
client = OpenAI(api_key=api_key)
|
|
elif api_provider == "groq":
|
|
client = Groq(api_key=api_key)
|
|
else:
|
|
logging.error(f"Unsupported API provider: {api_provider}")
|
|
print(f"❌ Error: Unsupported API provider: {api_provider}")
|
|
return
|
|
|
|
# Read the input file and prompt template
|
|
try:
|
|
with open(args.file) as f:
|
|
text = f.read()
|
|
logging.info(f"Successfully read input file: {args.file}")
|
|
|
|
with open(args.prompt) as f:
|
|
prompt = f.read()
|
|
logging.info(f"Successfully read prompt template: {args.prompt}")
|
|
|
|
except FileNotFoundError as e:
|
|
logging.error(f"File not found: {e}")
|
|
print(f"Error: {e}")
|
|
return
|
|
except Exception as e:
|
|
logging.error(f"Error reading files: {e}")
|
|
print(f"Error: {e}")
|
|
return
|
|
|
|
# Parse sections and headers
|
|
sections, headers, prologue = parse_qmd_sections(text) # Now properly unpacking prologue
|
|
launch_gui(sections, headers, text, prompt, args.file, prologue) # Pass prologue to the GUI
|
|
|
|
if __name__ == "__main__":
|
|
main()
|