mirror of
https://github.com/Shubhamsaboo/awesome-llm-apps.git
synced 2026-03-09 07:25:00 -05:00
Modified files: - llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py - advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py - advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py - rag_tutorials/rag_chain/app.py - rag_tutorials/hybrid_search_rag/main.py
200 lines
7.2 KiB
Python
200 lines
7.2 KiB
Python
import os
|
|
import streamlit as st
|
|
|
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
|
from langchain_chroma import Chroma
|
|
from langchain_community.document_loaders import PyPDFLoader
|
|
from langchain_text_splitters.sentence_transformers import SentenceTransformersTokenTextSplitter
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
from langchain_core.output_parsers import StrOutputParser
|
|
from langchain_core.runnables import RunnablePassthrough
|
|
|
|
|
|
# Initialize embedding model
|
|
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
|
|
|
# Initialize pharma database
|
|
db = Chroma(collection_name="pharma_database",
|
|
embedding_function=embedding_model,
|
|
persist_directory='./pharma_db')
|
|
|
|
def format_docs(docs):
|
|
"""Formats a list of document objects into a single string.
|
|
|
|
Args:
|
|
docs (list): A list of document objects, each having a 'page_content' attribute.
|
|
|
|
Returns:
|
|
str: A single string containing the page content from each document,
|
|
separated by double newlines."""
|
|
return "\n\n".join(doc.page_content for doc in docs)
|
|
|
|
def add_to_db(uploaded_files):
|
|
"""Processes and adds uploaded PDF files to the database.
|
|
|
|
This function checks if any files have been uploaded. If files are uploaded,
|
|
it saves each file to a temporary location, processes the content using a PDF loader,
|
|
and splits the content into smaller chunks. Each chunk, along with its metadata,
|
|
is then added to the database. Temporary files are removed after processing.
|
|
|
|
Args:
|
|
uploaded_files (list): A list of uploaded file objects to be processed.
|
|
|
|
Returns:
|
|
None"""
|
|
# Check if files are uploaded
|
|
if not uploaded_files:
|
|
st.error("No files uploaded!")
|
|
return
|
|
|
|
for uploaded_file in uploaded_files:
|
|
# Save the uploaded file to a temporary path
|
|
temp_file_path = os.path.join("./temp", uploaded_file.name)
|
|
os.makedirs(os.path.dirname(temp_file_path), exist_ok=True)
|
|
|
|
with open(temp_file_path, "wb") as temp_file:
|
|
temp_file.write(uploaded_file.getbuffer())
|
|
|
|
# Load the file using PyPDFLoader
|
|
loader = PyPDFLoader(temp_file_path)
|
|
data = loader.load()
|
|
|
|
# Store metadata and content
|
|
doc_metadata = [data[i].metadata for i in range(len(data))]
|
|
doc_content = [data[i].page_content for i in range(len(data))]
|
|
|
|
# Split documents into smaller chunks
|
|
st_text_splitter = SentenceTransformersTokenTextSplitter(
|
|
model_name="sentence-transformers/all-mpnet-base-v2",
|
|
chunk_size=100,
|
|
chunk_overlap=50
|
|
)
|
|
st_chunks = st_text_splitter.create_documents(doc_content, doc_metadata)
|
|
|
|
# Add chunks to database
|
|
db.add_documents(st_chunks)
|
|
|
|
# Remove the temporary file after processing
|
|
os.remove(temp_file_path)
|
|
|
|
def run_rag_chain(query):
|
|
"""Processes a query using a Retrieval-Augmented Generation (RAG) chain.
|
|
|
|
This function utilizes a RAG chain to answer a given query. It retrieves
|
|
relevant context using similarity search and then generates a response
|
|
based on this context using a chat model. The chat model is pre-configured
|
|
with a prompt template specialized in pharmaceutical sciences.
|
|
|
|
Args:
|
|
query (str): The user's question that needs to be answered.
|
|
|
|
Returns:
|
|
str: A response generated by the chat model, based on the retrieved context."""
|
|
# Create a Retriever Object and apply Similarity Search
|
|
retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5})
|
|
|
|
# Initialize a Chat Prompt Template
|
|
PROMPT_TEMPLATE = """
|
|
You are a highly knowledgeable assistant specializing in pharmaceutical sciences.
|
|
Answer the question based only on the following context:
|
|
{context}
|
|
|
|
Answer the question based on the above context:
|
|
{question}
|
|
|
|
Use the provided context to answer the user's question accurately and concisely.
|
|
Don't justify your answers.
|
|
Don't give information not mentioned in the CONTEXT INFORMATION.
|
|
Do not say "according to the context" or "mentioned in the context" or similar.
|
|
"""
|
|
|
|
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
|
|
|
|
# Initialize a Generator (i.e. Chat Model)
|
|
chat_model = ChatGoogleGenerativeAI(
|
|
model="gemini-1.5-pro",
|
|
api_key=st.session_state.get("gemini_api_key"),
|
|
temperature=1
|
|
)
|
|
|
|
# Initialize a Output Parser
|
|
output_parser = StrOutputParser()
|
|
|
|
# RAG Chain
|
|
rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt_template | chat_model | output_parser
|
|
|
|
# Invoke the Chain
|
|
response = rag_chain.invoke(query)
|
|
|
|
return response
|
|
|
|
def main():
|
|
"""Initialize and manage the PharmaQuery application interface.
|
|
|
|
This function sets up the Streamlit application interface for PharmaQuery,
|
|
a Pharmaceutical Insight Retrieval System. Users can enter queries related
|
|
to the pharmaceutical industry, upload research documents, and manage API
|
|
keys for enhanced functionality.
|
|
|
|
The main features include:
|
|
- Query input area for users to ask questions about the pharmaceutical industry.
|
|
- Submission button to process the query and display the retrieved insights.
|
|
- Sidebar for API key input and management.
|
|
- File uploader for adding research documents to the database, enhancing query responses.
|
|
|
|
Args:
|
|
None
|
|
|
|
Returns:
|
|
None"""
|
|
st.set_page_config(page_title="PharmaQuery", page_icon=":microscope:")
|
|
st.header("Pharmaceutical Insight Retrieval System")
|
|
|
|
query = st.text_area(
|
|
":bulb: Enter your query about the Pharmaceutical Industry:",
|
|
placeholder="e.g., What are the AI applications in drug discovery?"
|
|
)
|
|
|
|
if st.button("Submit"):
|
|
if not query:
|
|
st.warning("Please ask a question")
|
|
|
|
else:
|
|
with st.spinner("Thinking..."):
|
|
result = run_rag_chain(query=query)
|
|
st.write(result)
|
|
|
|
with st.sidebar:
|
|
st.title("API Keys")
|
|
gemini_api_key = st.text_input("Enter your Gemini API key:", type="password")
|
|
|
|
if st.button("Enter"):
|
|
if gemini_api_key:
|
|
st.session_state.gemini_api_key = gemini_api_key
|
|
st.success("API key saved!")
|
|
|
|
else:
|
|
st.warning("Please enter your Gemini API key to proceed.")
|
|
|
|
with st.sidebar:
|
|
st.markdown("---")
|
|
pdf_docs = st.file_uploader("Upload your research documents related to Pharmaceutical Sciences (Optional) :memo:",
|
|
type=["pdf"],
|
|
accept_multiple_files=True
|
|
)
|
|
|
|
if st.button("Submit & Process"):
|
|
if not pdf_docs:
|
|
st.warning("Please upload the file")
|
|
|
|
else:
|
|
with st.spinner("Processing your documents..."):
|
|
add_to_db(pdf_docs)
|
|
st.success(":file_folder: Documents successfully added to the database!")
|
|
|
|
# Sidebar Footer
|
|
st.sidebar.write("Built with ❤️ by [Charan](https://www.linkedin.com/in/codewithcharan/)")
|
|
|
|
if __name__ == "__main__":
|
|
main() |