ident_v2.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from langchain.document_loaders.csv_loader import CSVLoader
  2. from langchain_community.vectorstores import Chroma
  3. from langchain_community.embeddings import OllamaEmbeddings
  4. from langchain_community.chat_models import ChatOllama
  5. from langchain.prompts import PromptTemplate
  6. from langchain.chains import LLMChain
  7. from langchain.text_splitter import RecursiveCharacterTextSplitter
  8. class IdentV2:
  9. loader = None
  10. documents = None
  11. embeddings = None
  12. vectorstore = None
  13. llm = None
  14. def __init__(self):
  15. self.loader = CSVLoader(file_path="../csv/bm_res_lite_peugeot.csv", metadata_columns=["modelo_id"])
  16. self.documents = self.loader.load()
  17. print(f"Loaded {len(self.documents)} registers")
  18. self.embeddings = OllamaEmbeddings(model="nomic-embed-text")
  19. chunks = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0).split_documents(self.documents)
  20. self.vectorstore = Chroma.from_documents(chunks, self.embeddings, persist_directory="db/chroma_db")
  21. self.llm = ChatOllama(model_name="mistral", temperature=0)
  22. def search(self, query, k=7):
  23. docs = self.vectorstore.similarity_search(query, k=k)
  24. print(f"Search results: {docs}")
  25. return [(doc.page_content, 'modelo_id: ' + doc.metadata["modelo_id"]) for doc in docs]
  26. def get_chain(self):
  27. template = """
  28. You are a world expert in vehicle appraisal
  29. Below I am going to share with you a partial description of a vehicle model:
  30. {query}
  31. Your job is to try to find the vehicle model that best fits the description from the following list of available models:
  32. {models}
  33. Consider the following comparison criteria in descending order:
  34. - Year
  35. - Brand
  36. - Model
  37. - Transmission
  38. - Variant
  39. - Displacement
  40. - Fuel
  41. - Traction
  42. - Number of doors
  43. - Price (10%)
  44. Explain your comparison criterion and give me a list of the models ordered in descending order that have best fitted, including de 'model_id'.
  45. """
  46. prompt = PromptTemplate(input_variables=["models", "query"], template=template)
  47. chain = LLMChain(llm=self.llm, prompt=prompt)
  48. return chain
  49. def ident(self, query):
  50. results = self.search(query)
  51. chain = self.get_chain()
  52. response = chain.invoke(dict(query=query, models=results))
  53. return response['text']
  54. print(IdentV2().ident("PEUGEOT 208 2020 1.2 SIGNATURE 1.2 PURETECH 82 MT KILOMETRAJE 65035 KM TRANSMISION MT PUERTAS 5 ANO 2020 TRACCION 4X2 MOTOR 1200 LT TIPO HATCHBACK COMBUSTIBLE BENCINA COLOR GRIS PLATINUM"))