from langchain_community.llms import Ollama from langchain_community.embeddings import OllamaEmbeddings from langchain.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_community.vectorstores import FAISS from langchain.chains import create_retrieval_chain from langchain.vectorstores.utils import filter_complex_metadata MODEL = "mistral" prompt = ChatPromptTemplate.from_template( """ Use the following context as your learned knowledge, inside XML tags. {context} When answer to user: - If you don't know, just say that you don't know. - If you don't know when you are not sure, ask for clarification. Avoid mentioning that you obtained the information from the context. And answer according to the language of the user's question. Given the context information, answer the query. Query: {input} """ ) llm = Ollama(model=MODEL) docs = PyPDFLoader("sample.pdf").load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100) chunks = text_splitter.split_documents(docs) chunks = filter_complex_metadata(chunks) vector = FAISS.from_documents(chunks, OllamaEmbeddings()) retriever = vector.as_retriever() output_parser = StrOutputParser() document_chain = create_stuff_documents_chain(llm, prompt, output_parser=output_parser) retrieval_chain = create_retrieval_chain(retriever, document_chain) response = retrieval_chain.invoke({ "input": """ Dame un resumen del documento Translate the answer to spanish Show only the translated answer, nothing more """ }) print(response["answer"])