migrate_weaviate.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/env python3
  2. """
  3. Weaviate kollektsiooni migratsioon (remote -> local)
  4. """
  5. import weaviate
  6. import json
  7. from typing import Optional
  8. # Ühendused
  9. REMOTE_WEAVIATE_URL = "http://hetzner:9020"
  10. LOCAL_WEAVIATE_URL = "http://localhost:8080"
  11. COLLECTION_NAME = "ScientificArticles"
  12. BATCH_SIZE = 100
  13. def connect_weaviate(url: str):
  14. """Ühenda Weaviate'ga"""
  15. client = weaviate.Client(url)
  16. if not client.is_ready():
  17. raise ConnectionError(f"Weaviate pole käimas: {url}")
  18. return client
  19. def get_schema(client, collection_name: str):
  20. """Tõmba kollektsiooni schema"""
  21. schema = client.schema.get()
  22. for cls in schema.get("classes", []):
  23. if cls["class"] == collection_name:
  24. return cls
  25. raise ValueError(f"Kollektsiooni {collection_name} pole olemas")
  26. def export_collection(client, collection_name: str):
  27. """Ekspordi kõik objektid Remote Weaviate'st"""
  28. print(f"📤 Eksporditakse {collection_name} remote'st ({REMOTE_WEAVIATE_URL})...")
  29. where_filter = {
  30. "path": ["__typename"],
  31. "operator": "NotEqual",
  32. "valueString": ""
  33. }
  34. objects = []
  35. after = None
  36. while True:
  37. response = client.data_object.get(
  38. collection_name=collection_name,
  39. limit=BATCH_SIZE,
  40. after=after,
  41. where=where_filter
  42. )
  43. batch = response.get("objects", [])
  44. if not batch:
  45. break
  46. objects.extend(batch)
  47. print(f" ✓ Eksportitud {len(objects)} objekti...")
  48. # Jätka järgmiselt batshi
  49. if len(batch) < BATCH_SIZE:
  50. break
  51. after = batch[-1]["id"]
  52. print(f"✅ Kokku eksportitud: {len(objects)} objekti")
  53. return objects
  54. def import_collection(client, collection_name: str, objects: list):
  55. """Importi objektid lokaalse Weaviate'sse"""
  56. print(f"📥 Importitakse {collection_name} lokaalse Weaviate'sse...")
  57. success_count = 0
  58. error_count = 0
  59. for i, obj in enumerate(objects, 1):
  60. try:
  61. # Eemalda ID, et Weaviate saaks luua uue
  62. obj_data = obj.copy()
  63. obj_id = obj_data.pop("id", None)
  64. client.data_object.create(
  65. data_object=obj_data.get("properties", {}),
  66. class_name=collection_name,
  67. uuid=obj_id # Kasuta sama ID
  68. )
  69. success_count += 1
  70. if i % 50 == 0:
  71. print(f" ✓ Importitud {i}/{len(objects)} objekti...")
  72. except Exception as e:
  73. error_count += 1
  74. print(f" ❌ Objekt {obj.get('id')}: {str(e)}")
  75. print(f"✅ Importimine valmis: {success_count} edu, {error_count} viga")
  76. return success_count
  77. def main():
  78. try:
  79. # Ühenda
  80. print("🔗 Ühendatakse remote Weaviate'ga...")
  81. remote_client = connect_weaviate(REMOTE_WEAVIATE_URL)
  82. print("🔗 Ühendatakse lokaalsesse Weaviate'ga...")
  83. local_client = connect_weaviate(LOCAL_WEAVIATE_URL)
  84. # Kontrolli kollektsiooni
  85. print(f"📋 Kontrollitakse kollektsiooni {COLLECTION_NAME}...")
  86. schema = get_schema(remote_client, COLLECTION_NAME)
  87. print(f" ✓ Leitud: {schema['class']}")
  88. # Loo kohalik kolleektsioon sama schemaga
  89. try:
  90. local_client.schema.get()
  91. print("⚠️ Lokaalne kolleektsioon võib juba eksisteerida")
  92. except:
  93. print(f"📝 Luuakse lokaalne kolleektsioon {COLLECTION_NAME}...")
  94. local_client.schema.create_class(schema)
  95. # Ekspordi + Importi
  96. objects = export_collection(remote_client, COLLECTION_NAME)
  97. import_collection(local_client, COLLECTION_NAME, objects)
  98. print("\n✨ Migratsioon valmis!")
  99. except Exception as e:
  100. print(f"❌ Viga: {str(e)}")
  101. return 1
  102. return 0
  103. if __name__ == "__main__":
  104. exit(main())