from langchain_community.llms import Ollama from langchain_community.embeddings import OllamaEmbeddings from langchain.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_community.document_loaders import BSHTMLLoader # from langchain_community.document_loaders import UnstructuredURLLoader from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.chains import create_retrieval_chain MODEL = "mistral" prompt = ChatPromptTemplate.from_template( """ You are a world class expert in vehicle appraisal. Answer only based on the following provided context. If you know the answer but it's not based in the provided context, don't provide the answer, just state the answer is not in the context provided: {context} User: {input} """ ) llm = Ollama(model=MODEL) raw_documents = BSHTMLLoader("../html/yapo_3.html").load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_documents(raw_documents) # loader = UnstructuredURLLoader(urls=[ # "https://www.yapo.cl/vehiculos/peugeot-3008-16-hybrid4-e-auto8-gt-2023_89068823" # ]) vector = Chroma.from_documents(texts, OllamaEmbeddings()) retriever = vector.as_retriever() output_parser = StrOutputParser() document_chain = create_stuff_documents_chain(llm, prompt, output_parser=output_parser) retrieval_chain = create_retrieval_chain(retriever, document_chain) response = retrieval_chain.invoke({ "input": """ Estás viendo la publicación de un vehículo, búsca la siguiente información: año marca modelo precio kilometraje combustible transmisión main gallery photo url código publicación """ }) print(response["answer"])