Ver código fonte

Requirements actualizado

Pablo Barrera Yaksic 1 mês atrás
pai
commit
8875b63d13
4 arquivos alterados com 113 adições e 212 exclusões
  1. BIN
      ident/__pycache__/ident.cpython-311.pyc
  2. 2 2
      ident/ident.py
  3. 1 1
      ident/main.py
  4. 110 209
      requirements.txt

BIN
ident/__pycache__/ident.cpython-311.pyc


+ 2 - 2
ident/ident.py

@@ -77,10 +77,10 @@ class Ident:
         identified_fields = identified_fields.replace(", ", "\n")
         print(f"New query: {identified_fields}")
 
-        text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+        text_splitter = CharacterTextSplitter()
         docs = text_splitter.split_documents(documents)
         # print(documents)
-        self.vectorstore = Chroma.from_documents(docs, embedding=OllamaEmbeddings(model="nomic-embed-text"))
+        self.vectorstore = Chroma.from_documents(docs, embedding=OllamaEmbeddings(model="mxbai-embed-large"))
         # print(f"Response (2): {self.vectorstore.similarity_search_with_score(identified_fields)}")
         self.retriever = self.vectorstore.as_retriever()
 

+ 1 - 1
ident/main.py

@@ -1,5 +1,5 @@
 from ident import Ident
 
 ident = Ident()
-response = ident.ask("KIA SONET 2022 SONET EX 1.5L 6MT ACC FULL PLUS KILOMETRAJE 10945 KM TRANSMISION MT PUERTAS 5 ANO 2022 TRACCION 4X2 MOTOR 1500 LT TIPO SUV COMBUSTIBLE BENCINA COLOR GRIS GRAVEDAD")
+response = ident.ask("TOYOTA RUSH 2018 4x2 MT BENCINA")
 print(response)

+ 110 - 209
requirements.txt

@@ -1,209 +1,110 @@
-aiohttp
-aiosignal
-altair
-annotated-types
-antlr4-python3-runtime
-anyio
-asgiref
-assemblyai
-async-timeout
-attrs
-backoff
-bcrypt
-beautifulsoup4
-blinker
-build
-cachetools
-certifi
-cffi
-chardet
-charset-normalizer
-chroma-hnswlib
-chromadb
-click
-coloredlogs
-contourpy
-cryptography
-cycler
-dataclasses-json
-dataclasses-json-speakeasy
-Deprecated
-distro
-effdet
-emoji
-exceptiongroup
-faiss-cpu
-fastapi
-filelock
-filetype
-flatbuffers
-fonttools
-frozenlist
-fsspec
-gitdb
-GitPython
-google-auth
-googleapis-common-protos
-graphlib-backport
-greenlet
-grpcio
-h11
-httpcore
-httptools
-httpx
-huggingface-hub
-humanfriendly
-idna
-importlib-metadata
-importlib_resources
-iopath
-Jinja2
-joblib
-jsonpatch
-jsonpath-python
-jsonpointer
-jsonschema
-jsonschema-specifications
-kiwisolver
-kubernetes
-langchain
-langchain-community
-langchain-core
-langchain-openai
-langdetect
-langsmith
-layoutparser
-lxml
-markdown-it-py
-MarkupSafe
-marshmallow
-matplotlib
-mdurl
-mmh3
-monotonic
-mpmath
-multidict
-mypy-extensions
-mysql
-mysqlclient
-networkx
-nltk
-numpy
-nvidia-cublas-cu12
-nvidia-cuda-cupti-cu12
-nvidia-cuda-nvrtc-cu12
-nvidia-cuda-runtime-cu12
-nvidia-cudnn-cu12
-nvidia-cufft-cu12
-nvidia-curand-cu12
-nvidia-cusolver-cu12
-nvidia-cusparse-cu12
-nvidia-nccl-cu12
-nvidia-nvjitlink-cu12
-nvidia-nvtx-cu12
-oauthlib
-omegaconf
-onnx
-onnxruntime
-openai
-opencv-python
-opentelemetry-api
-opentelemetry-exporter-otlp-proto-common
-opentelemetry-exporter-otlp-proto-grpc
-opentelemetry-instrumentation
-opentelemetry-instrumentation-asgi
-opentelemetry-instrumentation-fastapi
-opentelemetry-proto
-opentelemetry-sdk
-opentelemetry-semantic-conventions
-opentelemetry-util-http
-orjson
-overrides
-packaging
-pandas
-pdf2image
-pdfminer.six
-pdfplumber
-pikepdf
-pillow
-pillow_heif
-portalocker
-posthog
-protobuf
-pulsar-client
-pyarrow
-pyasn1
-pyasn1-modules
-pycocotools
-pycparser
-pycryptodome
-pydantic
-pydantic_core
-pydeck
-Pygments
-pyparsing
-pypdf
-pypdfium2
-PyPika
-pyproject_hooks
-pytesseract
-python-dateutil
-python-dotenv
-python-iso639
-python-magic
-python-multipart
-pytz
-PyYAML
-rapidfuzz
-referencing
-regex
-requests
-requests-oauthlib
-rich
-rpds-py
-rsa
-safetensors
-scipy
-six
-smmap
-sniffio
-soupsieve
-SQLAlchemy
-starlette
-streamlit
-streamlit-chat
-sympy
-tabulate
-tenacity
-tiktoken
-timm
-tokenizers
-toml
-tomli
-toolz
-torch
-torchvision
-tornado
-tqdm
-transformers
-triton
-typer
-typing-inspect
-typing_extensions
-tzdata
-tzlocal
-unstructured
-unstructured-client
-unstructured-inference
-unstructured.pytesseract
-urllib3
-uvicorn
-uvloop
-validators
-watchdog
-watchfiles
-websocket-client
-websockets
-wrapt
-yarl
-zipp
+attrs==23.2.1.dev0
+autocommand==2.2.2
+awscli==2.18.1
+awscrt==0.21.5
+Babel==2.15.0
+bcrypt==4.1.3
+Brlapi==0.8.5
+btrfsutil==6.10
+cached-property==1.5.2
+certifi==2024.8.30
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+configobj==5.0.8
+coverage==7.6.1
+cryptography==42.0.8
+dbus-python==1.3.2
+decorator==5.1.1
+distlib==0.3.8
+distro==1.9.0
+dnspython==2.6.1
+docutils==0.21.2
+fastjsonschema==2.20.0
+filelock==3.13.3
+gufw==24.4.0
+h11==0.14.0
+idna==3.8
+inflect==7.3.1
+isodate==0.6.1
+jaraco.classes==3.4.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.2
+jaraco.text==4.0.0
+jeepney==0.8.0
+Jinja2==3.1.4
+jmespath==1.0.1
+keyring==25.2.1
+lensfun==0.3.4
+libfdt==1.7.1
+libvirt-python==10.7.0
+louis==3.31.0
+lxml==5.3.0
+m64py==0.2.5
+Mako==1.3.5.dev0
+Markdown==3.7
+MarkupSafe==2.1.5
+more-itertools==10.3.0
+netsnmp-python==1.0a1
+ordered-set==4.1.0
+outcome==1.3.0.post0
+packaging==24.1
+platformdirs==4.3.6
+prompt_toolkit==3.0.48
+protonvpn-cli==3.13.0
+protonvpn-nm-lib==3.16.0
+psutil==6.0.0
+pwquality==1.4.5
+pycairo==1.27.0
+pycountry==24.6.1
+pycparser==2.22
+pycryptodome==3.21.0
+Pygments==2.18.0
+PyGObject==3.50.0
+pyOpenSSL==24.2.1
+pyperf==2.6.3
+PyQt5==5.15.11
+PyQt5_sip==12.15.0
+PySocks==1.7.1
+pyte==0.8.2
+python-dateutil==2.9.0
+python-gnupg==0.5.3
+python-linux-procfs==0.7.3
+pythondialog==3.5.3
+pytz==2024.2
+pyudev==0.24.3
+pyxdg==0.28
+Reflector==2023.6.28.0.36.1
+requests==2.32.3
+ruamel.yaml==0.18.6
+ruamel.yaml.clib==0.2.8
+SecretStorage==3.3.3
+setproctitle==1.3.3
+setuptools==69.5.1
+six==1.16.0
+snappergui==0.1
+sniffio==1.3.1
+sortedcontainers==2.4.0
+streamlink==6.11.0
+systemd-python==235
+TBB==0.2
+thefuck==3.32
+tomli==2.0.1
+trio==0.26.2
+trio-websocket==0.11.1
+trove-classifiers==2024.9.17
+typeguard==4.3.0
+typing_extensions==4.12.2
+ufw==0.36.2
+Unidecode==1.3.8
+urllib3==1.26.20
+validate==5.0.8
+validate-pyproject==0.20.2
+vdf==3.5
+virtualenv==20.26.2
+wcwidth==0.2.13
+websocket-client==1.8.0
+wheel==0.44.0
+wsproto==1.2.0