BUG: Model always cites the first source when multiple citations are returned #4055

Closed
opened 2025-11-11 15:45:16 -06:00 by GiteaMirror · 0 comments
Owner

Originally created by @twaninsecure on GitHub (Feb 22, 2025).

Problem

When a tool emits many citations, the model for some reason can only see the first one as the "source_id".

Image

Verification that only the first source id can be accessed.

Image

Expected behavior

For each of the citations which are visible in the UI and emitted, a separate source_id should be relayed to the model so that it can actually point to those citations correctly.

Tool code

Note that you cannot run this as this uses a local caching service to provide twitter data.

"""
title: Twitter Search Tool (via Nitter)
description: This tool returns a set of the recent twitter messages of a twitter username. For educational purposes only.
version: 1.0
license: MIT
"""

import json
import os
from datetime import datetime
from pydantic import BaseModel, Field

class EventEmitter:
    def __init__(self, emitter=None):
        self.emitter = emitter
        self.date_counters = {}  # Track counts for each date
        self.tweets_by_date = {}  # Group tweets by date for proper ordering

    async def emit(self, description="Unknown State", status="in_progress", done=False):
        if self.emitter:
            if isinstance(description, dict):
                await self.emitter(description)
            else:
                message = {
                    "type": "status",
                    "data": {
                        "status": status,
                        "description": description,
                        "done": done
                    }
                }
                await self.emitter(message)

    def get_citation(self, tweet, username):
        """Get citation for a tweet."""
        content = tweet.get('content')
        if not content:
            return None

        tweet_id = tweet.get('id')
        date = tweet.get('date')
        tweet_number = tweet.get('thread_number', None)
        
        try:
            tweet_date = datetime.strptime(date.replace(" UTC", ""), "%b %d, %Y · %I:%M %p")
            date_str = tweet_date.strftime("%d %b %Y %H:%M")
            
            source = f"@{username}"
            if date_str:
                source = f"{source} • {date_str}"
                if tweet_number is not None:
                    source = f"{source} #{tweet_number}"
        except (ValueError, TypeError):
            date_str = None
            source = f"@{username}"

        source_url = f"https://x.com/{username}"
        if tweet_id:
            source_url = f"{source_url}/status/{tweet_id}"

        citation = {
            "type": "citation",
            "data": {
                "document": [content],
                "metadata": [{
                    "source": source,
                }],
                "source": {
                    "name": source,
                    "url": source_url
                }
            }
        }

        return citation
    
    async def emit_citation(self, tweet, username):
        """Emit citation for a tweet."""
        if not self.emitter:
            return
        citation = self.get_citation(tweet, username)
        if not citation:
            return

        await self.emitter(citation)

def _format_tweet_markdown(tweet, username):
    """Convert a tweet object to markdown format."""
    tweet_id = tweet.get('id', '')
    content = tweet.get('content', '')
    date = tweet.get('date', '')
    stats = tweet.get('stats', {})
    tweet_number = tweet.get('thread_number', None)
    
    markdown = []
    
    # Format title matching emit_citation format
    source = f"@{username}"
    if date:
        try:
            tweet_date = datetime.strptime(date.replace(" UTC", ""), "%b %d, %Y · %I:%M %p")
            date_str = tweet_date.strftime("%d %b %Y %H:%M")
            source = f"{source} • {date_str}"
            if tweet_number is not None:
                source = f"{source} #{tweet_number}"
        except (ValueError, TypeError):
            pass

    # Add tweet header with the formatted source
    markdown.append(f"## {source}\n")
    
    # Add content
    markdown.append(f"{content}\n")
    
    # Add stats if available
    stats_line = []
    if stats.get('replies'):
        stats_line.append(f"💬 {stats['replies']}")
    if stats.get('retweets'):
        stats_line.append(f"🔄 {stats['retweets']}")
    if stats.get('likes'):
        stats_line.append(f"❤️ {stats['likes']}")
    if stats_line:
        markdown.append(' '.join(stats_line) + '\n')
    
    # Add separator
    markdown.append('---\n')
    
    return '\n'.join(markdown)

class Tools:
    class Valves(BaseModel):
        """Configuration values for the API, editable in the UI."""
        API_KEY: str = Field(default="default-token", description="Twitter API key")
        API_URL: str = Field(default=os.getenv("TWITTER_API_URL", "http://localhost:8000"), description="Local Twitter API URL")

    class Config(BaseModel):
        ENDPOINTS: dict = Field(default={
            "user_tweets": "/twitter/user"
        })
        ERROR_MESSAGES: dict = Field(default={
            "api_error": "Error accessing Twitter API",
            "invalid_username": "Invalid username provided"
        })

    def __init__(self):
        self.citation = False
        self.config = self.Config()
        self.valves = self.Valves()

    async def get_user_tweets(self, username: str, __event_emitter__=None) -> str:
        """
        Get recent tweets from a Twitter user.

        Args:
            username (str): Twitter username (without @)
            __event_emitter__ (callable, optional): Function to emit status events

        Returns:
            str: Markdown formatted tweet content
        """
        emitter = EventEmitter(__event_emitter__)

        await emitter.emit(f"Fetching tweets for user '{username}'...", status="in_progress", done=False)

        if not username:
            await emitter.emit(self.config.ERROR_MESSAGES["invalid_username"], done=True)
            return self.config.ERROR_MESSAGES["invalid_username"]

        try:
            import requests
            url = f"{self.valves.API_URL}{self.config.ENDPOINTS['user_tweets']}"
            headers = {"Authorization": f"Bearer {self.valves.API_KEY}"}
            params = {"username": username}

            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()

            # If there's an error in the response, return it
            if data.get("error"):
                await emitter.emit(data["error"], done=True)
                return data["error"]

            tweets = data.get("tweets", [])
            meta = data.get("meta", {})

            # Group tweets by date and add thread numbers
            tweets_by_date = {}
            for tweet in tweets:
                date = tweet.get('date')
                if date:
                    if date not in tweets_by_date:
                        tweets_by_date[date] = []
                    tweets_by_date[date].append(tweet)

            # Process each date group and add thread numbers
            processed_tweets = []
            for date, date_tweets in tweets_by_date.items():
                # Reverse tweets with same timestamp to get chronological order
                if len(date_tweets) > 1:
                    date_tweets = date_tweets[::-1]
                    for i, tweet in enumerate(date_tweets, 1):
                        tweet['thread_number'] = i
                processed_tweets.extend(date_tweets)
            
            # Create markdown output
            lines = [
                f"# {username} Tweets\n",
                f"*Fetched on {meta.get('fetched_at', datetime.now().isoformat())}*\n"
            ]

            # Process each tweet
            for tweet in processed_tweets:
                lines.append(_format_tweet_markdown(tweet, username))
                await emitter.emit_citation(tweet, username)

            await emitter.emit(description=f"Twitter retrieval for @{username} succesful ({len(tweets)} tweets): \n{lines}", status="complete",done=True)


            
            return '\n'.join(lines)

        except requests.RequestException as e:
            error_msg = f"{self.config.ERROR_MESSAGES['api_error']}: {str(e)}"
            await emitter.emit(error_msg, done=True)
            return error_msg

Open WebUI Version: I've updated to 0.5.16 and it's still there. My previous versions also had the same bug.

Originally created by @twaninsecure on GitHub (Feb 22, 2025). ### Problem When a tool emits many citations, the model for some reason can only see the first one as the "source_id". ![Image](https://github.com/user-attachments/assets/15e87ec9-1827-4ab4-953c-9f02d3bc22bd) Verification that only the first source id can be accessed. ![Image](https://github.com/user-attachments/assets/2ba40b54-ad4e-41da-8584-040d03bcdbe5) ### Expected behavior For each of the citations which are visible in the UI and emitted, a separate source_id should be relayed to the model so that it can actually point to those citations correctly. ### Tool code Note that you cannot run this as this uses a local caching service to provide twitter data. ``` """ title: Twitter Search Tool (via Nitter) description: This tool returns a set of the recent twitter messages of a twitter username. For educational purposes only. version: 1.0 license: MIT """ import json import os from datetime import datetime from pydantic import BaseModel, Field class EventEmitter: def __init__(self, emitter=None): self.emitter = emitter self.date_counters = {} # Track counts for each date self.tweets_by_date = {} # Group tweets by date for proper ordering async def emit(self, description="Unknown State", status="in_progress", done=False): if self.emitter: if isinstance(description, dict): await self.emitter(description) else: message = { "type": "status", "data": { "status": status, "description": description, "done": done } } await self.emitter(message) def get_citation(self, tweet, username): """Get citation for a tweet.""" content = tweet.get('content') if not content: return None tweet_id = tweet.get('id') date = tweet.get('date') tweet_number = tweet.get('thread_number', None) try: tweet_date = datetime.strptime(date.replace(" UTC", ""), "%b %d, %Y · %I:%M %p") date_str = tweet_date.strftime("%d %b %Y %H:%M") source = f"@{username}" if date_str: source = f"{source} • {date_str}" if tweet_number is not None: source = f"{source} #{tweet_number}" except (ValueError, TypeError): date_str = None source = f"@{username}" source_url = f"https://x.com/{username}" if tweet_id: source_url = f"{source_url}/status/{tweet_id}" citation = { "type": "citation", "data": { "document": [content], "metadata": [{ "source": source, }], "source": { "name": source, "url": source_url } } } return citation async def emit_citation(self, tweet, username): """Emit citation for a tweet.""" if not self.emitter: return citation = self.get_citation(tweet, username) if not citation: return await self.emitter(citation) def _format_tweet_markdown(tweet, username): """Convert a tweet object to markdown format.""" tweet_id = tweet.get('id', '') content = tweet.get('content', '') date = tweet.get('date', '') stats = tweet.get('stats', {}) tweet_number = tweet.get('thread_number', None) markdown = [] # Format title matching emit_citation format source = f"@{username}" if date: try: tweet_date = datetime.strptime(date.replace(" UTC", ""), "%b %d, %Y · %I:%M %p") date_str = tweet_date.strftime("%d %b %Y %H:%M") source = f"{source} • {date_str}" if tweet_number is not None: source = f"{source} #{tweet_number}" except (ValueError, TypeError): pass # Add tweet header with the formatted source markdown.append(f"## {source}\n") # Add content markdown.append(f"{content}\n") # Add stats if available stats_line = [] if stats.get('replies'): stats_line.append(f"💬 {stats['replies']}") if stats.get('retweets'): stats_line.append(f"🔄 {stats['retweets']}") if stats.get('likes'): stats_line.append(f"❤️ {stats['likes']}") if stats_line: markdown.append(' '.join(stats_line) + '\n') # Add separator markdown.append('---\n') return '\n'.join(markdown) class Tools: class Valves(BaseModel): """Configuration values for the API, editable in the UI.""" API_KEY: str = Field(default="default-token", description="Twitter API key") API_URL: str = Field(default=os.getenv("TWITTER_API_URL", "http://localhost:8000"), description="Local Twitter API URL") class Config(BaseModel): ENDPOINTS: dict = Field(default={ "user_tweets": "/twitter/user" }) ERROR_MESSAGES: dict = Field(default={ "api_error": "Error accessing Twitter API", "invalid_username": "Invalid username provided" }) def __init__(self): self.citation = False self.config = self.Config() self.valves = self.Valves() async def get_user_tweets(self, username: str, __event_emitter__=None) -> str: """ Get recent tweets from a Twitter user. Args: username (str): Twitter username (without @) __event_emitter__ (callable, optional): Function to emit status events Returns: str: Markdown formatted tweet content """ emitter = EventEmitter(__event_emitter__) await emitter.emit(f"Fetching tweets for user '{username}'...", status="in_progress", done=False) if not username: await emitter.emit(self.config.ERROR_MESSAGES["invalid_username"], done=True) return self.config.ERROR_MESSAGES["invalid_username"] try: import requests url = f"{self.valves.API_URL}{self.config.ENDPOINTS['user_tweets']}" headers = {"Authorization": f"Bearer {self.valves.API_KEY}"} params = {"username": username} response = requests.get(url, headers=headers, params=params) response.raise_for_status() data = response.json() # If there's an error in the response, return it if data.get("error"): await emitter.emit(data["error"], done=True) return data["error"] tweets = data.get("tweets", []) meta = data.get("meta", {}) # Group tweets by date and add thread numbers tweets_by_date = {} for tweet in tweets: date = tweet.get('date') if date: if date not in tweets_by_date: tweets_by_date[date] = [] tweets_by_date[date].append(tweet) # Process each date group and add thread numbers processed_tweets = [] for date, date_tweets in tweets_by_date.items(): # Reverse tweets with same timestamp to get chronological order if len(date_tweets) > 1: date_tweets = date_tweets[::-1] for i, tweet in enumerate(date_tweets, 1): tweet['thread_number'] = i processed_tweets.extend(date_tweets) # Create markdown output lines = [ f"# {username} Tweets\n", f"*Fetched on {meta.get('fetched_at', datetime.now().isoformat())}*\n" ] # Process each tweet for tweet in processed_tweets: lines.append(_format_tweet_markdown(tweet, username)) await emitter.emit_citation(tweet, username) await emitter.emit(description=f"Twitter retrieval for @{username} succesful ({len(tweets)} tweets): \n{lines}", status="complete",done=True) return '\n'.join(lines) except requests.RequestException as e: error_msg = f"{self.config.ERROR_MESSAGES['api_error']}: {str(e)}" await emitter.emit(error_msg, done=True) return error_msg ``` Open WebUI Version: I've updated to 0.5.16 and it's still there. My previous versions also had the same bug.
Sign in to join this conversation.
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: github-starred/open-webui#4055