[GH-ISSUE #882] An updated langchain sample #26187

Closed
opened 2026-04-22 02:14:59 -05:00 by GiteaMirror · 2 comments
Owner

Originally created by @suoko on GitHub (Oct 23, 2023).
Original GitHub issue: https://github.com/ollama/ollama/issues/882

Originally assigned to: @technovangelist on GitHub.

Below I wrote an extended sample for langchain which will import all PDF, TXT, DOCX and HTML files form /home/user/docs folder (it accepts only full paths).
You will need to pip install langchain pypdf chromadb tiktoken docx2txt unstructured

import sys
import os
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import UnstructuredHTMLLoader

from langchain.llms import Ollama
ollama = Ollama(base_url='http://localhost:11434', 
model="llama2")
#model="starcoder")



#print(ollama("why is the sky blue"))


documents = []
for file in os.listdir("/home/user/docs"):
    if file.endswith(".pdf"):
        pdf_path = "/home/user/docs/" + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.docx') or file.endswith('.doc'):
        doc_path = "/home/user/docs/" + file
        loader = Docx2txtLoader(doc_path)
        documents.extend(loader.load())
    elif file.endswith('.txt'):
        text_path = "/home/user/docs/" + file
        loader = TextLoader(text_path)
        documents.extend(loader.load())
    elif file.endswith('.html') or file.endswith('.htm'):
        text_path = "/home/user/docs/" + file
        loader = UnstructuredHTMLLoader(text_path)
        documents.extend(loader.load())


text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

question="in case of problems with the inverter 3PH 10KTL-15KTL-V2, which steps do I have to follow ?"
docs = vectorstore.similarity_search(question)
len(docs)

from langchain.chains import RetrievalQA
qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
print(qachain({"query": question}))
Originally created by @suoko on GitHub (Oct 23, 2023). Original GitHub issue: https://github.com/ollama/ollama/issues/882 Originally assigned to: @technovangelist on GitHub. Below I wrote an extended sample for langchain which will import all PDF, TXT, DOCX and HTML files form /home/user/docs folder (it accepts only full paths). You will need to `pip install langchain pypdf chromadb tiktoken docx2txt unstructured` ``` import sys import os from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import Docx2txtLoader from langchain.document_loaders import TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import CharacterTextSplitter from langchain.document_loaders import UnstructuredHTMLLoader from langchain.llms import Ollama ollama = Ollama(base_url='http://localhost:11434', model="llama2") #model="starcoder") #print(ollama("why is the sky blue")) documents = [] for file in os.listdir("/home/user/docs"): if file.endswith(".pdf"): pdf_path = "/home/user/docs/" + file loader = PyPDFLoader(pdf_path) documents.extend(loader.load()) elif file.endswith('.docx') or file.endswith('.doc'): doc_path = "/home/user/docs/" + file loader = Docx2txtLoader(doc_path) documents.extend(loader.load()) elif file.endswith('.txt'): text_path = "/home/user/docs/" + file loader = TextLoader(text_path) documents.extend(loader.load()) elif file.endswith('.html') or file.endswith('.htm'): text_path = "/home/user/docs/" + file loader = UnstructuredHTMLLoader(text_path) documents.extend(loader.load()) text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=20) all_splits = text_splitter.split_documents(documents) from langchain.embeddings import GPT4AllEmbeddings from langchain.vectorstores import Chroma vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings()) question="in case of problems with the inverter 3PH 10KTL-15KTL-V2, which steps do I have to follow ?" docs = vectorstore.similarity_search(question) len(docs) from langchain.chains import RetrievalQA qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever()) print(qachain({"query": question})) ```
Author
Owner

@jmorganca commented on GitHub (Oct 24, 2023):

Hi @suoko. Thanks for creating an issue! Would it be possible to create a PR that updates (preferable if one already exists) or creates a new example in examples/? I'll close this issue for now, but do feel free to create a PR and we can collaborate on it there

Thanks again!

<!-- gh-comment-id:1778199324 --> @jmorganca commented on GitHub (Oct 24, 2023): Hi @suoko. Thanks for creating an issue! Would it be possible to create a PR that updates (preferable if one already exists) or creates a new example in `examples/`? I'll close this issue for now, but do feel free to create a PR and we can collaborate on it there Thanks again!
Author
Owner

@mmenacer commented on GitHub (Nov 29, 2023):

Hi,

I'm doing the same here and instead of using a local model, i'm using Ollama Docker in an instance.
from langchain.llms import Ollama

ollama = Ollama(base_url='https://myollamalinkinhosted', 
model="llama2:70b")
)

I have this errors on print(qachain({"query": question}))

File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 312, in __call__
    raise e
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 306, in __call__
    self._call(inputs, run_manager=run_manager)
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/combine_documents/base.py", line 123, in _call
    output, extra_return_dict = self.combine_docs(
                                ^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py", line 172, in combine_docs
    return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 293, in predict
    return self(kwargs, callbacks=callbacks)[self.output_key]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 312, in __call__
    raise e
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 306, in __call__
    self._call(inputs, run_manager=run_manager)
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 103, in _call
    response = self.generate([inputs], run_manager=run_manager)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 115, in generate
    return self.llm.generate_prompt(
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 506, in generate_prompt
    return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 656, in generate
    output = self._generate_helper(
             ^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 543, in _generate_helper
    raise e
  File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 530, in _generate_helper
    self._generate(
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/llms/ollama.py", line 241, in _generate
    final_chunk = super()._stream_with_aggregation(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/llms/ollama.py", line 177, in _stream_with_aggregation
    for stream_resp in self._create_stream(prompt, stop, **kwargs):
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/langchain/llms/ollama.py", line 161, in _create_stream
    optional_detail = response.json().get("error")
                      ^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/requests/models.py", line 975, in json
    raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
<!-- gh-comment-id:1831512857 --> @mmenacer commented on GitHub (Nov 29, 2023): Hi, I'm doing the same here and instead of using a local model, i'm using Ollama Docker in an instance. from langchain.llms import Ollama ``` ollama = Ollama(base_url='https://myollamalinkinhosted', model="llama2:70b") ) ``` I have this errors on print(qachain({"query": question})) ``` File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 312, in __call__ raise e File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 306, in __call__ self._call(inputs, run_manager=run_manager) File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/combine_documents/base.py", line 123, in _call output, extra_return_dict = self.combine_docs( ^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py", line 172, in combine_docs return self.llm_chain.predict(callbacks=callbacks, **inputs), {} ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 293, in predict return self(kwargs, callbacks=callbacks)[self.output_key] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 312, in __call__ raise e File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/base.py", line 306, in __call__ self._call(inputs, run_manager=run_manager) File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 103, in _call response = self.generate([inputs], run_manager=run_manager) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain/chains/llm.py", line 115, in generate return self.llm.generate_prompt( ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 506, in generate_prompt return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 656, in generate output = self._generate_helper( ^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 543, in _generate_helper raise e File "/opt/homebrew/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 530, in _generate_helper self._generate( File "/opt/homebrew/lib/python3.11/site-packages/langchain/llms/ollama.py", line 241, in _generate final_chunk = super()._stream_with_aggregation( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain/llms/ollama.py", line 177, in _stream_with_aggregation for stream_resp in self._create_stream(prompt, stop, **kwargs): ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/langchain/llms/ollama.py", line 161, in _create_stream optional_detail = response.json().get("error") ^^^^^^^^^^^^^^^ File "/opt/homebrew/lib/python3.11/site-packages/requests/models.py", line 975, in json raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) requests.exceptions.JSONDecodeError: Expecting value: line 1 column 1 (char 0) ```
Sign in to join this conversation.
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: github-starred/ollama#26187