#!/usr/bin/env python3 """ Weaviate kollektsiooni migratsioon (remote -> local) """ import weaviate import json from typing import Optional # Ühendused REMOTE_WEAVIATE_URL = "http://hetzner:9020" LOCAL_WEAVIATE_URL = "http://localhost:8080" COLLECTION_NAME = "ScientificArticles" BATCH_SIZE = 100 def connect_weaviate(url: str): """Ühenda Weaviate'ga""" client = weaviate.Client(url) if not client.is_ready(): raise ConnectionError(f"Weaviate pole käimas: {url}") return client def get_schema(client, collection_name: str): """Tõmba kollektsiooni schema""" schema = client.schema.get() for cls in schema.get("classes", []): if cls["class"] == collection_name: return cls raise ValueError(f"Kollektsiooni {collection_name} pole olemas") def export_collection(client, collection_name: str): """Ekspordi kõik objektid Remote Weaviate'st""" print(f"📤 Eksporditakse {collection_name} remote'st ({REMOTE_WEAVIATE_URL})...") where_filter = { "path": ["__typename"], "operator": "NotEqual", "valueString": "" } objects = [] after = None while True: response = client.data_object.get( collection_name=collection_name, limit=BATCH_SIZE, after=after, where=where_filter ) batch = response.get("objects", []) if not batch: break objects.extend(batch) print(f" ✓ Eksportitud {len(objects)} objekti...") # Jätka järgmiselt batshi if len(batch) < BATCH_SIZE: break after = batch[-1]["id"] print(f"✅ Kokku eksportitud: {len(objects)} objekti") return objects def import_collection(client, collection_name: str, objects: list): """Importi objektid lokaalse Weaviate'sse""" print(f"📥 Importitakse {collection_name} lokaalse Weaviate'sse...") success_count = 0 error_count = 0 for i, obj in enumerate(objects, 1): try: # Eemalda ID, et Weaviate saaks luua uue obj_data = obj.copy() obj_id = obj_data.pop("id", None) client.data_object.create( data_object=obj_data.get("properties", {}), class_name=collection_name, uuid=obj_id # Kasuta sama ID ) success_count += 1 if i % 50 == 0: print(f" ✓ Importitud {i}/{len(objects)} objekti...") except Exception as e: error_count += 1 print(f" ❌ Objekt {obj.get('id')}: {str(e)}") print(f"✅ Importimine valmis: {success_count} edu, {error_count} viga") return success_count def main(): try: # Ühenda print("🔗 Ühendatakse remote Weaviate'ga...") remote_client = connect_weaviate(REMOTE_WEAVIATE_URL) print("🔗 Ühendatakse lokaalsesse Weaviate'ga...") local_client = connect_weaviate(LOCAL_WEAVIATE_URL) # Kontrolli kollektsiooni print(f"📋 Kontrollitakse kollektsiooni {COLLECTION_NAME}...") schema = get_schema(remote_client, COLLECTION_NAME) print(f" ✓ Leitud: {schema['class']}") # Loo kohalik kolleektsioon sama schemaga try: local_client.schema.get() print("⚠️ Lokaalne kolleektsioon võib juba eksisteerida") except: print(f"📝 Luuakse lokaalne kolleektsioon {COLLECTION_NAME}...") local_client.schema.create_class(schema) # Ekspordi + Importi objects = export_collection(remote_client, COLLECTION_NAME) import_collection(local_client, COLLECTION_NAME, objects) print("\n✨ Migratsioon valmis!") except Exception as e: print(f"❌ Viga: {str(e)}") return 1 return 0 if __name__ == "__main__": exit(main())