main.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from langchain_community.llms import Ollama
  2. from langchain_community.embeddings import OllamaEmbeddings
  3. from langchain.prompts import ChatPromptTemplate
  4. from langchain_core.output_parsers import StrOutputParser
  5. from langchain_community.document_loaders import PyPDFLoader
  6. from langchain.text_splitter import RecursiveCharacterTextSplitter
  7. from langchain.chains.combine_documents import create_stuff_documents_chain
  8. from langchain_community.vectorstores import FAISS
  9. from langchain.chains import create_retrieval_chain
  10. from langchain.vectorstores.utils import filter_complex_metadata
  11. MODEL = "mistral"
  12. prompt = ChatPromptTemplate.from_template(
  13. """
  14. Use the following context as your learned knowledge, inside <context></context> XML tags.
  15. <context>
  16. {context}
  17. </context>
  18. When answer to user:
  19. - If you don't know, just say that you don't know.
  20. - If you don't know when you are not sure, ask for clarification.
  21. Avoid mentioning that you obtained the information from the context.
  22. And answer according to the language of the user's question.
  23. Given the context information, answer the query.
  24. Query: {input}
  25. """
  26. )
  27. llm = Ollama(model=MODEL)
  28. docs = PyPDFLoader("sample.pdf").load()
  29. text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
  30. chunks = text_splitter.split_documents(docs)
  31. chunks = filter_complex_metadata(chunks)
  32. vector = FAISS.from_documents(chunks, OllamaEmbeddings())
  33. retriever = vector.as_retriever()
  34. output_parser = StrOutputParser()
  35. document_chain = create_stuff_documents_chain(llm, prompt, output_parser=output_parser)
  36. retrieval_chain = create_retrieval_chain(retriever, document_chain)
  37. response = retrieval_chain.invoke({
  38. "input": """
  39. Dame un resumen del documento
  40. Translate the answer to spanish
  41. Show only the translated answer, nothing more
  42. """
  43. })
  44. print(response["answer"])