| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- #!/usr/bin/env python3
- """
- Weaviate kollektsiooni migratsioon (remote -> local)
- """
- import weaviate
- import json
- from typing import Optional
- # Ühendused
- REMOTE_WEAVIATE_URL = "http://hetzner:9020"
- LOCAL_WEAVIATE_URL = "http://localhost:8080"
- COLLECTION_NAME = "ScientificArticles"
- BATCH_SIZE = 100
- def connect_weaviate(url: str):
- """Ühenda Weaviate'ga"""
- client = weaviate.Client(url)
- if not client.is_ready():
- raise ConnectionError(f"Weaviate pole käimas: {url}")
- return client
- def get_schema(client, collection_name: str):
- """Tõmba kollektsiooni schema"""
- schema = client.schema.get()
- for cls in schema.get("classes", []):
- if cls["class"] == collection_name:
- return cls
- raise ValueError(f"Kollektsiooni {collection_name} pole olemas")
- def export_collection(client, collection_name: str):
- """Ekspordi kõik objektid Remote Weaviate'st"""
- print(f"📤 Eksporditakse {collection_name} remote'st ({REMOTE_WEAVIATE_URL})...")
-
- where_filter = {
- "path": ["__typename"],
- "operator": "NotEqual",
- "valueString": ""
- }
-
- objects = []
- after = None
-
- while True:
- response = client.data_object.get(
- collection_name=collection_name,
- limit=BATCH_SIZE,
- after=after,
- where=where_filter
- )
-
- batch = response.get("objects", [])
- if not batch:
- break
-
- objects.extend(batch)
- print(f" ✓ Eksportitud {len(objects)} objekti...")
-
- # Jätka järgmiselt batshi
- if len(batch) < BATCH_SIZE:
- break
- after = batch[-1]["id"]
-
- print(f"✅ Kokku eksportitud: {len(objects)} objekti")
- return objects
- def import_collection(client, collection_name: str, objects: list):
- """Importi objektid lokaalse Weaviate'sse"""
- print(f"📥 Importitakse {collection_name} lokaalse Weaviate'sse...")
-
- success_count = 0
- error_count = 0
-
- for i, obj in enumerate(objects, 1):
- try:
- # Eemalda ID, et Weaviate saaks luua uue
- obj_data = obj.copy()
- obj_id = obj_data.pop("id", None)
-
- client.data_object.create(
- data_object=obj_data.get("properties", {}),
- class_name=collection_name,
- uuid=obj_id # Kasuta sama ID
- )
- success_count += 1
-
- if i % 50 == 0:
- print(f" ✓ Importitud {i}/{len(objects)} objekti...")
- except Exception as e:
- error_count += 1
- print(f" ❌ Objekt {obj.get('id')}: {str(e)}")
-
- print(f"✅ Importimine valmis: {success_count} edu, {error_count} viga")
- return success_count
- def main():
- try:
- # Ühenda
- print("🔗 Ühendatakse remote Weaviate'ga...")
- remote_client = connect_weaviate(REMOTE_WEAVIATE_URL)
-
- print("🔗 Ühendatakse lokaalsesse Weaviate'ga...")
- local_client = connect_weaviate(LOCAL_WEAVIATE_URL)
-
- # Kontrolli kollektsiooni
- print(f"📋 Kontrollitakse kollektsiooni {COLLECTION_NAME}...")
- schema = get_schema(remote_client, COLLECTION_NAME)
- print(f" ✓ Leitud: {schema['class']}")
-
- # Loo kohalik kolleektsioon sama schemaga
- try:
- local_client.schema.get()
- print("⚠️ Lokaalne kolleektsioon võib juba eksisteerida")
- except:
- print(f"📝 Luuakse lokaalne kolleektsioon {COLLECTION_NAME}...")
- local_client.schema.create_class(schema)
-
- # Ekspordi + Importi
- objects = export_collection(remote_client, COLLECTION_NAME)
- import_collection(local_client, COLLECTION_NAME, objects)
-
- print("\n✨ Migratsioon valmis!")
-
- except Exception as e:
- print(f"❌ Viga: {str(e)}")
- return 1
-
- return 0
- if __name__ == "__main__":
- exit(main())
|