{ "cells": [ { "cell_type": "markdown", "id": "4eda7add", "metadata": {}, "source": [ "# Weaviate" ] }, { "cell_type": "markdown", "id": "4956326d", "metadata": {}, "source": [ "## Ühenduse loomine Weaviate serveriga" ] }, { "cell_type": "code", "execution_count": 1, "id": "2dbe9e7a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ardo/rag-demo/.venv/lib/python3.12/site-packages/weaviate/__init__.py:143: DeprecationWarning: Dep010: Importing ConnectionParams from weaviate is deprecated. Import ConnectionParams from its module: weaviate.connect\n", " _Warnings.root_module_import(name, map_[name])\n" ] } ], "source": [ "from weaviate_export_import_clean import WeaviateExportImport" ] }, { "cell_type": "code", "execution_count": 2, "id": "ac2f77bf", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-12-29 18:12:10,042 - INFO - HTTP Request: GET http://hetzner:9020/v1/.well-known/openid-configuration \"HTTP/1.1 404 Not Found\"\n", "2025-12-29 18:12:10,390 - INFO - HTTP Request: GET http://hetzner:9020/v1/meta \"HTTP/1.1 200 OK\"\n", "2025-12-29 18:12:10,768 - INFO - HTTP Request: GET https://pypi.org/pypi/weaviate-client/json \"HTTP/1.1 200 OK\"\n", "2025-12-29 18:12:10,835 - INFO - Ühendatud Weaviate'ga: hetzner:9020\n" ] } ], "source": [ "# Ühenduse loomine\n", "src_client = WeaviateExportImport.create_client(\"hetzner\", http_port=9020)" ] }, { "cell_type": "markdown", "id": "49994031", "metadata": {}, "source": [ "## Muud käsud" ] }, { "cell_type": "code", "execution_count": 5, "id": "9201cb41", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-12-29 18:18:52,532 - INFO - HTTP Request: GET http://hetzner:9020/v1/schema \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/plain": [ "{'RAGDocuments': _CollectionConfigSimple(name='RAGDocuments', description=\"RAG kollektsioon Ollama embedding'utega\", generative_config=None, properties=[_Property(name='title', description='Dokumendi pealkiri', data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-ollama': _PropertyVectorizerConfig(skip=False, vectorize_property_name=False)}), _Property(name='content', description='Dokumendi sisu', data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-ollama': _PropertyVectorizerConfig(skip=False, vectorize_property_name=False)}), _Property(name='source', description='Dokumendi allikas', data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-ollama': _PropertyVectorizerConfig(skip=False, vectorize_property_name=False)}), _Property(name='category', description='Dokumendi kategooria', data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-ollama': _PropertyVectorizerConfig(skip=False, vectorize_property_name=False)})], references=[], reranker_config=None, vectorizer_config=None, vectorizer=None, vector_config={'default': _NamedVectorConfig(vectorizer=_NamedVectorizerConfig(vectorizer=, model={'apiEndpoint': 'http://ollama:11434', 'model': 'llama3.2:1b', 'vectorizeClassName': True}, source_properties=None), vector_index_config=_VectorIndexConfigHNSW(multi_vector=None, quantizer=None, cleanup_interval_seconds=300, distance_metric=, dynamic_ef_min=100, dynamic_ef_max=500, dynamic_ef_factor=8, ef=-1, ef_construction=128, filter_strategy=, flat_search_cutoff=40000, max_connections=32, skip=False, vector_cache_max_objects=1000000000000))}, object_ttl_config=None),\n", " 'ResearchArticles': _CollectionConfigSimple(name='ResearchArticles', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", generative_config=None, properties=[_Property(name='document_type', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='source_file', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='char_count', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='text', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='word_count', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='chunk_id', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='total_pages', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None), _Property(name='chunk_hash', description=\"This property was generated by Weaviate's auto-schema feature on Sun Dec 28 18:14:09 2025\", data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-ollama', vectorizer_configs=None)], references=[], reranker_config=None, vectorizer_config=_VectorizerConfig(vectorizer=, model={}, vectorize_collection_name=False), vectorizer=, vector_config=None, object_ttl_config=None),\n", " 'TestArticles': _CollectionConfigSimple(name='TestArticles', description=None, generative_config=None, properties=[_Property(name='title', description=None, data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-ollama': _PropertyVectorizerConfig(skip=False, vectorize_property_name=False)}), _Property(name='content', description=None, data_type=, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-ollama': _PropertyVectorizerConfig(skip=False, vectorize_property_name=False)})], references=[], reranker_config=None, vectorizer_config=None, vectorizer=None, vector_config={'default': _NamedVectorConfig(vectorizer=_NamedVectorizerConfig(vectorizer=, model={'apiEndpoint': 'http://ollama:11434', 'model': 'llama3.2:1b', 'vectorizeClassName': True}, source_properties=None), vector_index_config=_VectorIndexConfigHNSW(multi_vector=None, quantizer=None, cleanup_interval_seconds=300, distance_metric=, dynamic_ef_min=100, dynamic_ef_max=500, dynamic_ef_factor=8, ef=-1, ef_construction=128, filter_strategy=, flat_search_cutoff=40000, max_connections=32, skip=False, vector_cache_max_objects=1000000000000))}, object_ttl_config=None)}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Kollektsioonide nimekiri\n", "src_client.collections.list_all()" ] }, { "cell_type": "code", "execution_count": 4, "id": "93b0b0e8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-12-29 18:18:48,750 - INFO - HTTP Request: DELETE http://hetzner:9020/v1/schema/ScientificArticle \"HTTP/1.1 200 OK\"\n", "2025-12-29 18:18:48,786 - INFO - HTTP Request: DELETE http://hetzner:9020/v1/schema/ScientificArticles \"HTTP/1.1 200 OK\"\n" ] } ], "source": [ "# Kustuta kollektsioon\n", "src_client.collections.delete(\"ScientificArticle\")\n", "src_client.collections.delete(\"ScientificArticles\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ac436417", "metadata": {}, "outputs": [], "source": [ "# Vektori vaatamine\n", "results = dst_client.collections.get(\"Article\").query.fetch_objects(\n", " limit=2,\n", " include_vector=True\n", ")\n", "\n", "print(results.objects[1].vector)" ] }, { "cell_type": "code", "execution_count": null, "id": "c3a7bbbb", "metadata": {}, "outputs": [], "source": [ "# Vektori vaatamine\n", "results_src = src_client.collections.get(\"Article\").query.fetch_objects(\n", " limit=1,\n", " include_vector=True\n", ")\n", "\n", "results_dst = dst_client.collections.get(\"Article\").query.fetch_objects(\n", " limit=1,\n", " include_vector=True\n", ")\n", "\n", "print(results_src)\n", "print(results_dst)" ] }, { "cell_type": "code", "execution_count": null, "id": "6ccfa014", "metadata": {}, "outputs": [], "source": [ "# Vektori vaatamine\n", "results_src = src_client.collections.get(\"DocIngest\").query.fetch_objects(\n", " limit=1,\n", " include_vector=True\n", ")\n", "\n", "results_dst = dst_client.collections.get(\"DocIngest\").query.fetch_objects(\n", " limit=1,\n", " include_vector=True\n", ")\n", "\n", "print(results_src)\n", "print(results_dst)" ] }, { "cell_type": "code", "execution_count": null, "id": "c36921eb", "metadata": {}, "outputs": [], "source": [ "schema = src_client.collections.get(\"Article\").config.get()\n", "print(schema)\n", "schema = dst_client.collections.get(\"Article\").config.get()\n", "print(schema)" ] }, { "cell_type": "markdown", "id": "86adec54", "metadata": {}, "source": [ "## Sulge ühendused" ] }, { "cell_type": "code", "execution_count": null, "id": "e654ae89", "metadata": {}, "outputs": [], "source": [ "wei.close_clients()" ] } ], "metadata": { "kernelspec": { "display_name": "Python (RAG)", "language": "python", "name": "myproject" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }