diff --git a/rag_tutorials/qwen_local_rag/README.md b/rag_tutorials/qwen_local_rag/README.md new file mode 100644 index 0000000..c7d2c27 --- /dev/null +++ b/rag_tutorials/qwen_local_rag/README.md @@ -0,0 +1,113 @@ +# πŸ‹ Qwen 3 Local RAG Reasoning Agent + +This RAG Application demonstrates how to build a powerful Retrieval-Augmented Generation (RAG) system using locally running Qwen 3 and Gemma 3 models via Ollama. It combines document processing, vector search, and web search capabilities to provide accurate, context-aware responses to user queries. + +## Features + +- **🧠 Multiple Local LLM Options**: + + - Qwen3 (1.7b, 8b) - Alibaba's latest language models + - Gemma3 (1b, 4b) - Google's efficient language models with multimodal capabilities + - DeepSeek (1.5b) - Alternative model option +- **πŸ“š Comprehensive RAG System**: + + - Upload and process PDF documents + - Extract content from web URLs + - Intelligent chunking and embedding + - Similarity search with adjustable threshold +- **🌐 Web Search Integration**: + + - Fallback to web search when document knowledge is insufficient + - Configurable domain filtering + - Source attribution in responses +- **πŸ”„ Flexible Operation Modes**: + + - Toggle between RAG and direct LLM interaction + - Force web search when needed + - Adjust similarity thresholds for document retrieval +- **πŸ’Ύ Vector Database Integration**: + + - Qdrant vector database for efficient similarity search + - Persistent storage of document embeddings + +## How to Get Started + +### Prerequisites + +- [Ollama](https://ollama.ai/) installed locally +- Python 3.8+ +- Qdrant account (free tier available) for vector storage +- Exa API key (optional, for web search capability) + +### Installation + +1. Clone the GitHub repository + +```bash +git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git +cd rag_tutorials/qwen_local_rag +``` + +2. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +3. Pull the required models using Ollama: + +```bash +ollama pull qwen3:1.7b # Or any other model you want to use +ollama run snowflake-arctic-embed # Or any other model you want to use +``` + +4. Get your API keys: + + - Qdrant API key and URL (for vector database) + - Exa API key (optional, for web search) +5. Run the application: + +```bash +streamlit run qwen_local_rag_agent.py +``` + +## How It Works + +1. **Document Processing**: + + - PDF files are processed using PyPDFLoader + - Web content is extracted using WebBaseLoader + - Documents are split into chunks with RecursiveCharacterTextSplitter +2. **Vector Database**: + + - Document chunks are embedded using Ollama's embedding models + - Embeddings are stored in Qdrant vector database + - Similarity search retrieves relevant documents based on query +3. **Query Processing**: + + - User queries are analyzed to determine the best information source + - System checks document relevance using similarity threshold + - Falls back to web search if no relevant documents are found +4. **Response Generation**: + + - Local LLM (Qwen/Gemma) generates responses based on retrieved context + - Sources are cited and displayed to the user + - Web search results are clearly indicated when used + +## Configuration Options + +- **Model Selection**: Choose between different Qwen, Gemma, and DeepSeek models +- **RAG Mode**: Toggle between RAG-enabled and direct LLM interaction +- **Search Tuning**: Adjust similarity threshold for document retrieval +- **Web Search**: Enable/disable web search fallback and configure domain filtering + +## Use Cases + +- **Document Q&A**: Ask questions about your uploaded documents +- **Research Assistant**: Combine document knowledge with web search +- **Local Privacy**: Process sensitive documents without sending data to external APIs +- **Offline Operation**: Run advanced AI capabilities with limited or no internet access + +## Requirements + +See `requirements.txt` for the complete list of dependencies. diff --git a/rag_tutorials/qwen_local_rag/qwen_local_rag_agent.py b/rag_tutorials/qwen_local_rag/qwen_local_rag_agent.py new file mode 100644 index 0000000..637eea0 --- /dev/null +++ b/rag_tutorials/qwen_local_rag/qwen_local_rag_agent.py @@ -0,0 +1,545 @@ +import os +import tempfile +from datetime import datetime +from typing import List +import streamlit as st +import bs4 +from agno.agent import Agent +from agno.models.ollama import Ollama +from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_qdrant import QdrantVectorStore +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams +from langchain_core.embeddings import Embeddings +from agno.tools.exa import ExaTools +from agno.embedder.ollama import OllamaEmbedder + + +class OllamaEmbedderr(Embeddings): + def __init__(self, model_name="snowflake-arctic-embed"): + """ + Initialize the OllamaEmbedderr with a specific model. + + Args: + model_name (str): The name of the model to use for embedding. + """ + self.embedder = OllamaEmbedder(id=model_name, dimensions=1024) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(text) for text in texts] + + def embed_query(self, text: str) -> List[float]: + return self.embedder.get_embedding(text) + + +# Constants +COLLECTION_NAME = "test-qwen-r1" + + +# Streamlit App Initialization +st.title("πŸ‹ Qwen 3 Local RAG Reasoning Agent") + +# --- Add Model Info Boxes --- +st.info("**Qwen3:** The latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models.") +st.info("**Gemma 3:** These models are multimodalβ€”processing text and imagesβ€”and feature a 128K context window with support for over 140 languages.") +# ------------------------- + +# Session State Initialization +if 'google_api_key' not in st.session_state: + st.session_state.google_api_key = "" +if 'qdrant_api_key' not in st.session_state: + st.session_state.qdrant_api_key = "" +if 'qdrant_url' not in st.session_state: + st.session_state.qdrant_url = "" +if 'model_version' not in st.session_state: + st.session_state.model_version = "qwen3:1.7b" # Default to lighter model +if 'vector_store' not in st.session_state: + st.session_state.vector_store = None +if 'processed_documents' not in st.session_state: + st.session_state.processed_documents = [] +if 'history' not in st.session_state: + st.session_state.history = [] +if 'exa_api_key' not in st.session_state: + st.session_state.exa_api_key = "" +if 'use_web_search' not in st.session_state: + st.session_state.use_web_search = False +if 'force_web_search' not in st.session_state: + st.session_state.force_web_search = False +if 'similarity_threshold' not in st.session_state: + st.session_state.similarity_threshold = 0.7 +if 'rag_enabled' not in st.session_state: + st.session_state.rag_enabled = True # RAG is enabled by default + + +# Sidebar Configuration +st.sidebar.header("βš™οΈ Settings") + +# Model Selection +st.sidebar.header("🧠 Model Choice") +model_help = """ +- qwen3:1.7b: Lighter model (MoE) +- gemma3:1b: More capable but requires better GPU/RAM(32k context window) +- gemma3:4b: More capable and MultiModal (Vision)(128k context window) +- deepseek-r1:1.5b +- qwen3:8b: More capable but requires better GPU/RAM + +Choose based on your hardware capabilities. +""" +st.session_state.model_version = st.sidebar.radio( + "Select Model Version", + options=["qwen3:1.7b", "gemma3:1b", "gemma3:4b", "deepseek-r1:1.5b", "qwen3:8b"], + help=model_help +) + +st.sidebar.info("Run ollama pull qwen3:1.7b") + +# RAG Mode Toggle +st.sidebar.header("πŸ“š RAG Mode") +st.session_state.rag_enabled = st.sidebar.toggle("Enable RAG", value=st.session_state.rag_enabled) + +# Clear Chat Button +if st.sidebar.button("✨ Clear Chat"): + st.session_state.history = [] + st.rerun() + +# Show API Configuration only if RAG is enabled +if st.session_state.rag_enabled: + st.sidebar.header("πŸ—οΈ API Keys") + qdrant_api_key = st.sidebar.text_input("Qdrant API Key", type="password", value=st.session_state.qdrant_api_key) + qdrant_url = st.sidebar.text_input("Qdrant URL", + placeholder="https://your-cluster.cloud.qdrant.io:6333", + value=st.session_state.qdrant_url) + + # Update session state + st.session_state.qdrant_api_key = qdrant_api_key + st.session_state.qdrant_url = qdrant_url + + # Search Configuration (only shown in RAG mode) + st.sidebar.header("πŸ”¬ Search Tuning") + st.session_state.similarity_threshold = st.sidebar.slider( + "Similarity Threshold", + min_value=0.0, + max_value=1.0, + value=0.7, + help="Lower values will return more documents but might be less relevant. Higher values are more strict." + ) + +# Add in the sidebar configuration section, after the existing API inputs + +st.sidebar.header("🌍 Web Search") +st.session_state.use_web_search = st.sidebar.checkbox("Enable Web Search Fallback", value=st.session_state.use_web_search) + +if st.session_state.use_web_search: + exa_api_key = st.sidebar.text_input( + "Exa AI API Key", + type="password", + value=st.session_state.exa_api_key, + help="Required for web search fallback when no relevant documents are found" + ) + st.session_state.exa_api_key = exa_api_key + + # Optional domain filtering + default_domains = ["arxiv.org", "wikipedia.org", "github.com", "medium.com"] + custom_domains = st.sidebar.text_input( + "Custom domains (comma-separated)", + value=",".join(default_domains), + help="Enter domains to search from, e.g.: arxiv.org,wikipedia.org" + ) + search_domains = [d.strip() for d in custom_domains.split(",") if d.strip()] + +# Utility Functions +def init_qdrant() -> QdrantClient | None: + """Initialize Qdrant client with configured settings. + + Returns: + QdrantClient: The initialized Qdrant client if successful. + None: If the initialization fails. + """ + if not all([st.session_state.qdrant_api_key, st.session_state.qdrant_url]): + return None + try: + return QdrantClient( + url=st.session_state.qdrant_url, + api_key=st.session_state.qdrant_api_key, + timeout=60 + ) + except Exception as e: + st.error(f"πŸ”΄ Qdrant connection failed: {str(e)}") + return None + + +# Document Processing Functions +def process_pdf(file) -> List: + """Process PDF file and add source metadata.""" + try: + with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: + tmp_file.write(file.getvalue()) + loader = PyPDFLoader(tmp_file.name) + documents = loader.load() + + # Add source metadata + for doc in documents: + doc.metadata.update({ + "source_type": "pdf", + "file_name": file.name, + "timestamp": datetime.now().isoformat() + }) + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=200 + ) + return text_splitter.split_documents(documents) + except Exception as e: + st.error(f"πŸ“„ PDF processing error: {str(e)}") + return [] + + +def process_web(url: str) -> List: + """Process web URL and add source metadata.""" + try: + loader = WebBaseLoader( + web_paths=(url,), + bs_kwargs=dict( + parse_only=bs4.SoupStrainer( + class_=("post-content", "post-title", "post-header", "content", "main") + ) + ) + ) + documents = loader.load() + + # Add source metadata + for doc in documents: + doc.metadata.update({ + "source_type": "url", + "url": url, + "timestamp": datetime.now().isoformat() + }) + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=200 + ) + return text_splitter.split_documents(documents) + except Exception as e: + st.error(f"🌐 Web processing error: {str(e)}") + return [] + + +# Vector Store Management +def create_vector_store(client, texts): + """Create and initialize vector store with documents.""" + try: + # Create collection if needed + try: + client.create_collection( + collection_name=COLLECTION_NAME, + vectors_config=VectorParams( + size=1024, + distance=Distance.COSINE + ) + ) + st.success(f"πŸ“š Created new collection: {COLLECTION_NAME}") + except Exception as e: + if "already exists" not in str(e).lower(): + raise e + + # Initialize vector store + vector_store = QdrantVectorStore( + client=client, + collection_name=COLLECTION_NAME, + embedding=OllamaEmbedderr() + ) + + # Add documents + with st.spinner('πŸ“€ Uploading documents to Qdrant...'): + vector_store.add_documents(texts) + st.success("βœ… Documents stored successfully!") + return vector_store + + except Exception as e: + st.error(f"πŸ”΄ Vector store error: {str(e)}") + return None + +def get_web_search_agent() -> Agent: + """Initialize a web search agent.""" + return Agent( + name="Web Search Agent", + model=Ollama(id="llama3.2"), + tools=[ExaTools( + api_key=st.session_state.exa_api_key, + include_domains=search_domains, + num_results=5 + )], + instructions="""You are a web search expert. Your task is to: + 1. Search the web for relevant information about the query + 2. Compile and summarize the most relevant information + 3. Include sources in your response + """, + show_tool_calls=True, + markdown=True, + ) + + +def get_rag_agent() -> Agent: + """Initialize the main RAG agent.""" + return Agent( + name="Qwen 3 RAG Agent", + model=Ollama(id=st.session_state.model_version), + instructions="""You are an Intelligent Agent specializing in providing accurate answers. + + When asked a question: + - Analyze the question and answer the question with what you know. + + When given context from documents: + - Focus on information from the provided documents + - Be precise and cite specific details + + When given web search results: + - Clearly indicate that the information comes from web search + - Synthesize the information clearly + + Always maintain high accuracy and clarity in your responses. + """, + show_tool_calls=True, + markdown=True, + ) + + + + +def check_document_relevance(query: str, vector_store, threshold: float = 0.7) -> tuple[bool, List]: + + if not vector_store: + return False, [] + + retriever = vector_store.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"k": 5, "score_threshold": threshold} + ) + docs = retriever.invoke(query) + return bool(docs), docs + + +chat_col, toggle_col = st.columns([0.9, 0.1]) + +with chat_col: + prompt = st.chat_input("Ask about your documents..." if st.session_state.rag_enabled else "Ask me anything...") + +with toggle_col: + st.session_state.force_web_search = st.toggle('🌐', help="Force web search") + +# Check if RAG is enabled +if st.session_state.rag_enabled: + qdrant_client = init_qdrant() + + # --- Document Upload Section (Moved to Main Area) --- + with st.expander("πŸ“ Upload Documents or URLs for RAG", expanded=False): + if not qdrant_client: + st.warning("⚠️ Please configure Qdrant API Key and URL in the sidebar to enable document processing.") + else: + uploaded_files = st.file_uploader( + "Upload PDF files", + accept_multiple_files=True, + type='pdf' + ) + url_input = st.text_input("Enter URL to scrape") + + if uploaded_files: + st.write(f"Processing {len(uploaded_files)} PDF file(s)...") + all_texts = [] + for file in uploaded_files: + if file.name not in st.session_state.processed_documents: + with st.spinner(f"Processing {file.name}... "): + texts = process_pdf(file) + if texts: + all_texts.extend(texts) + st.session_state.processed_documents.append(file.name) + else: + st.write(f"πŸ“„ {file.name} already processed.") + + if all_texts: + with st.spinner("Creating vector store..."): + st.session_state.vector_store = create_vector_store(qdrant_client, all_texts) + + if url_input: + if url_input not in st.session_state.processed_documents: + with st.spinner(f"Scraping and processing {url_input}..."): + texts = process_web(url_input) + if texts: + st.session_state.vector_store = create_vector_store(qdrant_client, texts) + st.session_state.processed_documents.append(url_input) + else: + st.write(f"πŸ”— {url_input} already processed.") + + if st.session_state.vector_store: + st.success("Vector store is ready.") + elif not uploaded_files and not url_input: + st.info("Upload PDFs or enter a URL to populate the vector store.") + + # Display sources in sidebar + if st.session_state.processed_documents: + st.sidebar.header("πŸ“š Processed Sources") + for source in st.session_state.processed_documents: + if source.endswith('.pdf'): + st.sidebar.text(f"πŸ“„ {source}") + else: + st.sidebar.text(f"🌐 {source}") + +if prompt: + # Add user message to history + st.session_state.history.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.write(prompt) + + if st.session_state.rag_enabled: + + # Existing RAG flow remains unchanged + with st.spinner("πŸ€”Evaluating the Query..."): + try: + rewritten_query = prompt + + with st.expander("Evaluating the query"): + st.write(f"User's Prompt: {prompt}") + except Exception as e: + st.error(f"❌ Error rewriting query: {str(e)}") + rewritten_query = prompt + + # Step 2: Choose search strategy based on force_web_search toggle + context = "" + docs = [] + if not st.session_state.force_web_search and st.session_state.vector_store: + # Try document search first + retriever = st.session_state.vector_store.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={ + "k": 5, + "score_threshold": st.session_state.similarity_threshold + } + ) + docs = retriever.invoke(rewritten_query) + if docs: + context = "\n\n".join([d.page_content for d in docs]) + st.info(f"πŸ“Š Found {len(docs)} relevant documents (similarity > {st.session_state.similarity_threshold})") + elif st.session_state.use_web_search: + st.info("πŸ”„ No relevant documents found in database, falling back to web search...") + + # Step 3: Use web search if: + # 1. Web search is forced ON via toggle, or + # 2. No relevant documents found AND web search is enabled in settings + if (st.session_state.force_web_search or not context) and st.session_state.use_web_search and st.session_state.exa_api_key: + with st.spinner("πŸ” Searching the web..."): + try: + web_search_agent = get_web_search_agent() + web_results = web_search_agent.run(rewritten_query).content + if web_results: + context = f"Web Search Results:\n{web_results}" + if st.session_state.force_web_search: + st.info("ℹ️ Using web search as requested via toggle.") + else: + st.info("ℹ️ Using web search as fallback since no relevant documents were found.") + except Exception as e: + st.error(f"❌ Web search error: {str(e)}") + + # Step 4: Generate response using the RAG agent + with st.spinner("πŸ€– Thinking..."): + try: + rag_agent = get_rag_agent() + + if context: + full_prompt = f"""Context: {context} + +Original Question: {prompt} +Please provide a comprehensive answer based on the available information.""" + else: + full_prompt = f"Original Question: {prompt}\n" + st.info("ℹ️ No relevant information found in documents or web search.") + + response = rag_agent.run(full_prompt) + + # Add assistant response to history + st.session_state.history.append({ + "role": "assistant", + "content": response.content + }) + + # Display assistant response + with st.chat_message("assistant"): + st.write(response.content) + + # Show sources if available + if not st.session_state.force_web_search and 'docs' in locals() and docs: + with st.expander("πŸ” See document sources"): + for i, doc in enumerate(docs, 1): + source_type = doc.metadata.get("source_type", "unknown") + source_icon = "πŸ“„" if source_type == "pdf" else "🌐" + source_name = doc.metadata.get("file_name" if source_type == "pdf" else "url", "unknown") + st.write(f"{source_icon} Source {i} from {source_name}:") + st.write(f"{doc.page_content[:200]}...") + + except Exception as e: + st.error(f"❌ Error generating response: {str(e)}") + + else: + # Simple mode without RAG + with st.spinner("πŸ€– Thinking..."): + try: + rag_agent = get_rag_agent() + web_search_agent = get_web_search_agent() if st.session_state.use_web_search else None + + # Handle web search if forced or enabled + context = "" + if st.session_state.force_web_search and web_search_agent: + with st.spinner("πŸ” Searching the web..."): + try: + web_results = web_search_agent.run(prompt).content + if web_results: + context = f"Web Search Results:\n{web_results}" + st.info("ℹ️ Using web search as requested.") + except Exception as e: + st.error(f"❌ Web search error: {str(e)}") + + # Generate response + if context: + full_prompt = f"""Context: {context} + +Question: {prompt} + +Please provide a comprehensive answer based on the available information.""" + else: + full_prompt = prompt + + response = rag_agent.run(full_prompt) + response_content = response.content + + # Extract thinking process and final response + import re + think_pattern = r'(.*?)' + think_match = re.search(think_pattern, response_content, re.DOTALL) + + if think_match: + thinking_process = think_match.group(1).strip() + final_response = re.sub(think_pattern, '', response_content, flags=re.DOTALL).strip() + else: + thinking_process = None + final_response = response_content + + # Add assistant response to history (only the final response) + st.session_state.history.append({ + "role": "assistant", + "content": final_response + }) + + # Display assistant response + with st.chat_message("assistant"): + if thinking_process: + with st.expander("πŸ€” See thinking process"): + st.markdown(thinking_process) + st.markdown(final_response) + + except Exception as e: + st.error(f"❌ Error generating response: {str(e)}") + +else: + st.warning("You can directly talk to qwen and gemma models locally! Toggle the RAG mode to upload documents!") \ No newline at end of file diff --git a/rag_tutorials/qwen_local_rag/requirements.txt b/rag_tutorials/qwen_local_rag/requirements.txt new file mode 100644 index 0000000..f988a22 --- /dev/null +++ b/rag_tutorials/qwen_local_rag/requirements.txt @@ -0,0 +1,8 @@ +agno +pypdf +exa +qdrant-client +langchain-qdrant +langchain-community +streamlit +ollama