123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- from langchain_community.llms import Ollama
- from langchain_community.embeddings import OllamaEmbeddings
- from langchain.prompts import ChatPromptTemplate
- from langchain_core.output_parsers import StrOutputParser
- from langchain_community.document_loaders import BSHTMLLoader
- # from langchain_community.document_loaders import UnstructuredURLLoader
- from langchain.text_splitter import CharacterTextSplitter
- from langchain_community.vectorstores import Chroma
- from langchain.chains.combine_documents import create_stuff_documents_chain
- from langchain.chains import create_retrieval_chain
- MODEL = "mistral"
- prompt = ChatPromptTemplate.from_template(
- """
- You are a world class expert in vehicle appraisal.
- Answer only based on the following provided context. If you know the answer but it's not based in the
- provided context, don't provide the answer, just state the answer is not in the context provided:
- <context>
- {context}
- </context>
- User: {input}
- """
- )
- llm = Ollama(model=MODEL)
- raw_documents = BSHTMLLoader("../html/yapo_3.html").load()
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
- texts = text_splitter.split_documents(raw_documents)
- # loader = UnstructuredURLLoader(urls=[
- # "https://www.yapo.cl/vehiculos/peugeot-3008-16-hybrid4-e-auto8-gt-2023_89068823"
- # ])
- vector = Chroma.from_documents(texts, OllamaEmbeddings())
- retriever = vector.as_retriever()
- output_parser = StrOutputParser()
- document_chain = create_stuff_documents_chain(llm, prompt, output_parser=output_parser)
- retrieval_chain = create_retrieval_chain(retriever, document_chain)
- response = retrieval_chain.invoke({
- "input": """
- Estás viendo la publicación de un vehículo, búsca la siguiente información:
- año
- marca
- modelo
- precio
- kilometraje
- combustible
- transmisión
- main gallery photo url
- código publicación
- """
- })
- print(response["answer"])
|