1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- from langchain_community.llms import Ollama
- from langchain_community.embeddings import OllamaEmbeddings
- from langchain.prompts import ChatPromptTemplate
- from langchain_core.output_parsers import StrOutputParser
- from langchain_community.document_loaders import PyPDFLoader
- from langchain.text_splitter import RecursiveCharacterTextSplitter
- from langchain.chains.combine_documents import create_stuff_documents_chain
- from langchain_community.vectorstores import FAISS
- from langchain.chains import create_retrieval_chain
- from langchain.vectorstores.utils import filter_complex_metadata
- MODEL = "mistral"
- prompt = ChatPromptTemplate.from_template(
- """
- Use the following context as your learned knowledge, inside <context></context> XML tags.
- <context>
- {context}
- </context>
-
- When answer to user:
- - If you don't know, just say that you don't know.
- - If you don't know when you are not sure, ask for clarification.
- Avoid mentioning that you obtained the information from the context.
- And answer according to the language of the user's question.
-
- Given the context information, answer the query.
- Query: {input}
- """
- )
- llm = Ollama(model=MODEL)
- docs = PyPDFLoader("sample.pdf").load()
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
- chunks = text_splitter.split_documents(docs)
- chunks = filter_complex_metadata(chunks)
- vector = FAISS.from_documents(chunks, OllamaEmbeddings())
- retriever = vector.as_retriever()
- output_parser = StrOutputParser()
- document_chain = create_stuff_documents_chain(llm, prompt, output_parser=output_parser)
- retrieval_chain = create_retrieval_chain(retriever, document_chain)
- response = retrieval_chain.invoke({
- "input": """
- Dame un resumen del documento
- Translate the answer to spanish
- Show only the translated answer, nothing more
- """
- })
- print(response["answer"])
|