123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- from langchain.document_loaders.csv_loader import CSVLoader
- from langchain_community.vectorstores import Chroma
- from langchain_community.embeddings import OllamaEmbeddings
- from langchain_community.chat_models import ChatOllama
- from langchain.prompts import PromptTemplate
- from langchain.chains import LLMChain
- from langchain.text_splitter import RecursiveCharacterTextSplitter
- class IdentV2:
- loader = None
- documents = None
- embeddings = None
- vectorstore = None
- llm = None
- def __init__(self):
- self.loader = CSVLoader(file_path="../csv/bm_res_lite_peugeot.csv", metadata_columns=["modelo_id"])
- self.documents = self.loader.load()
- print(f"Loaded {len(self.documents)} registers")
- self.embeddings = OllamaEmbeddings(model="nomic-embed-text")
- chunks = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0).split_documents(self.documents)
- self.vectorstore = Chroma.from_documents(chunks, self.embeddings, persist_directory="db/chroma_db")
- self.llm = ChatOllama(model_name="mistral", temperature=0)
- def search(self, query, k=7):
- docs = self.vectorstore.similarity_search(query, k=k)
- print(f"Search results: {docs}")
- return [(doc.page_content, 'modelo_id: ' + doc.metadata["modelo_id"]) for doc in docs]
- def get_chain(self):
- template = """
- You are a world expert in vehicle appraisal
- Below I am going to share with you a partial description of a vehicle model:
- {query}
-
- Your job is to try to find the vehicle model that best fits the description from the following list of available models:
- {models}
-
- Consider the following comparison criteria in descending order:
- - Year
- - Brand
- - Model
- - Transmission
- - Variant
- - Displacement
- - Fuel
- - Traction
- - Number of doors
- - Price (10%)
-
- Explain your comparison criterion and give me a list of the models ordered in descending order that have best fitted, including de 'model_id'.
- """
- prompt = PromptTemplate(input_variables=["models", "query"], template=template)
- chain = LLMChain(llm=self.llm, prompt=prompt)
- return chain
- def ident(self, query):
- results = self.search(query)
- chain = self.get_chain()
- response = chain.invoke(dict(query=query, models=results))
- return response['text']
- print(IdentV2().ident("PEUGEOT 208 2020 1.2 SIGNATURE 1.2 PURETECH 82 MT KILOMETRAJE 65035 KM TRANSMISION MT PUERTAS 5 ANO 2020 TRACCION 4X2 MOTOR 1200 LT TIPO HATCHBACK COMBUSTIBLE BENCINA COLOR GRIS PLATINUM"))
|