from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.vectorstores.utils import filter_complex_metadata
MODEL = "mistral"
prompt = ChatPromptTemplate.from_template(
"""
Use the following context as your learned knowledge, inside XML tags.
{context}
When answer to user:
- If you don't know, just say that you don't know.
- If you don't know when you are not sure, ask for clarification.
Avoid mentioning that you obtained the information from the context.
And answer according to the language of the user's question.
Given the context information, answer the query.
Query: {input}
"""
)
llm = Ollama(model=MODEL)
docs = PyPDFLoader("sample.pdf").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)
chunks = filter_complex_metadata(chunks)
vector = FAISS.from_documents(chunks, OllamaEmbeddings())
retriever = vector.as_retriever()
output_parser = StrOutputParser()
document_chain = create_stuff_documents_chain(llm, prompt, output_parser=output_parser)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
response = retrieval_chain.invoke({
"input": """
Dame un resumen del documento
Translate the answer to spanish
Show only the translated answer, nothing more
"""
})
print(response["answer"])