|
|
@@ -1,4 +1,6 @@
|
|
|
import weaviate
|
|
|
+from weaviate.classes.config import DataType, Property, Configure, VectorDistances
|
|
|
+from weaviate.classes.query import Filter
|
|
|
import hashlib
|
|
|
import json
|
|
|
from typing import Dict, List, Optional, Any
|
|
|
@@ -42,7 +44,7 @@ class WeaviateClient:
|
|
|
# Kui on API võti
|
|
|
auth_credentials = weaviate.auth.AuthApiKey(config.weaviate_api_key)
|
|
|
client = weaviate.WeaviateClient(
|
|
|
- connection_params=weaviate.ConnectionParams.from_params(
|
|
|
+ connection_params=weaviate.connect.ConnectionParams.from_params(
|
|
|
http_host=host,
|
|
|
http_port=port,
|
|
|
http_secure=secure,
|
|
|
@@ -55,7 +57,7 @@ class WeaviateClient:
|
|
|
else:
|
|
|
# Ilma autentimiseta
|
|
|
client = weaviate.WeaviateClient(
|
|
|
- connection_params=weaviate.ConnectionParams.from_params(
|
|
|
+ connection_params=weaviate.connect.ConnectionParams.from_params(
|
|
|
http_host=host,
|
|
|
http_port=port,
|
|
|
http_secure=secure,
|
|
|
@@ -127,88 +129,84 @@ class WeaviateClient:
|
|
|
self.client.collections.create(
|
|
|
name=self.class_name,
|
|
|
# Kasutame oma embeddinguid
|
|
|
- vectorizer_config=weaviate.classes.config.Configure.Vectorizer.none(),
|
|
|
+ vector_config=Configure.Vector.none(),
|
|
|
properties=[
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="article_id",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Artikli unikaalne ID"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="title",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Artikli pealkiri"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="authors",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT_ARRAY,
|
|
|
+ data_type=DataType.TEXT_ARRAY,
|
|
|
description="Artikli autorid"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="year",
|
|
|
- data_type=weaviate.classes.data.DataType.INT,
|
|
|
+ data_type=DataType.INT,
|
|
|
description="Avaldamisaasta"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="journal",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Žurnaal"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="doi",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="DOI identifikaator"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="abstract_en",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Inglise keelne abstrakt"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="summary_et",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Eesti keelne kokkuvõte"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="key_concepts",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT_ARRAY,
|
|
|
+ data_type=DataType.TEXT_ARRAY,
|
|
|
description="Võtmesõnad ja mõisted"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="methods_used",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT_ARRAY,
|
|
|
+ data_type=DataType.TEXT_ARRAY,
|
|
|
description="Kasutatud meetodid"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="transport_context",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Transpordi konteksti analüüs"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="relevance_score",
|
|
|
- data_type=weaviate.classes.data.DataType.INT,
|
|
|
+ data_type=DataType.INT,
|
|
|
description="Relevantsus skoor 1-10"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="processing_date",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Töötlemise kuupäev"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="source_file",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Algne PDF fail"
|
|
|
),
|
|
|
- weaviate.classes.config.Property(
|
|
|
+ Property(
|
|
|
name="file_hash",
|
|
|
- data_type=weaviate.classes.data.DataType.TEXT,
|
|
|
+ data_type=DataType.TEXT,
|
|
|
description="Faili hash duplikaatide kontrolliks"
|
|
|
)
|
|
|
- ],
|
|
|
- # Vector index seaded
|
|
|
- vector_index_config=weaviate.classes.config.Configure.VectorIndex.hnsw(
|
|
|
- distance_metric=weaviate.classes.config.VectorDistances.COSINE
|
|
|
- )
|
|
|
+ ]
|
|
|
)
|
|
|
|
|
|
self.logger.info(f"Loodi klass: {self.class_name}")
|
|
|
@@ -241,7 +239,7 @@ class WeaviateClient:
|
|
|
collection = self.client.collections.get(self.class_name)
|
|
|
|
|
|
response = collection.query.fetch_objects(
|
|
|
- filters=weaviate.classes.query.Filter.by_property("article_id").equal(article_id),
|
|
|
+ filters=Filter.by_property("article_id").equal(article_id),
|
|
|
limit=1
|
|
|
)
|
|
|
|