from langchain.document_loaders.csv_loader import CSVLoader from langchain_community.vectorstores import Chroma from langchain_community.embeddings import OllamaEmbeddings from langchain_community.chat_models import ChatOllama from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain.text_splitter import RecursiveCharacterTextSplitter class IdentV2: loader = None documents = None embeddings = None vectorstore = None llm = None def __init__(self): self.loader = CSVLoader(file_path="../csv/bm_res_lite_peugeot.csv", metadata_columns=["modelo_id"]) self.documents = self.loader.load() print(f"Loaded {len(self.documents)} registers") self.embeddings = OllamaEmbeddings(model="nomic-embed-text") chunks = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0).split_documents(self.documents) self.vectorstore = Chroma.from_documents(chunks, self.embeddings, persist_directory="db/chroma_db") self.llm = ChatOllama(model_name="mistral", temperature=0) def search(self, query, k=7): docs = self.vectorstore.similarity_search(query, k=k) print(f"Search results: {docs}") return [(doc.page_content, 'modelo_id: ' + doc.metadata["modelo_id"]) for doc in docs] def get_chain(self): template = """ You are a world expert in vehicle appraisal Below I am going to share with you a partial description of a vehicle model: {query} Your job is to try to find the vehicle model that best fits the description from the following list of available models: {models} Consider the following comparison criteria in descending order: - Year - Brand - Model - Transmission - Variant - Displacement - Fuel - Traction - Number of doors - Price (10%) Explain your comparison criterion and give me a list of the models ordered in descending order that have best fitted, including de 'model_id'. """ prompt = PromptTemplate(input_variables=["models", "query"], template=template) chain = LLMChain(llm=self.llm, prompt=prompt) return chain def ident(self, query): results = self.search(query) chain = self.get_chain() response = chain.invoke(dict(query=query, models=results)) return response['text'] print(IdentV2().ident("PEUGEOT 208 2020 1.2 SIGNATURE 1.2 PURETECH 82 MT KILOMETRAJE 65035 KM TRANSMISION MT PUERTAS 5 ANO 2020 TRACCION 4X2 MOTOR 1200 LT TIPO HATCHBACK COMBUSTIBLE BENCINA COLOR GRIS PLATINUM"))