migrate_weaviate_rest.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. #!/usr/bin/env python3
  2. """
  3. Weaviate migratsioon REST API abil - ainult ScientificArticle
  4. Eemalda vectorizer konfig, et kohalik Weaviate saaks skeem luua
  5. """
  6. import requests
  7. import json
  8. import copy
  9. REMOTE_URL = "http://hetzner:9020"
  10. LOCAL_URL = "http://localhost:8080"
  11. COLLECTION_NAME = "ScientificArticle"
  12. def clean_schema(schema: dict):
  13. """Eemalda vectorizer/module references"""
  14. cleaned = copy.deepcopy(schema)
  15. # Eemalda vectorizer konfid
  16. cleaned.pop("vectorizer", None)
  17. cleaned.pop("vectorizerConfig", None)
  18. cleaned.pop("vectorIndexConfig", None)
  19. cleaned.pop("vector_config", None)
  20. cleaned.pop("vectorIndexType", None)
  21. # Eemalda propertytest vectorizer infod
  22. for prop in cleaned.get("properties", []):
  23. prop.pop("vectorizer", None)
  24. prop.pop("vectorizerConfig", None)
  25. prop.pop("vectorizer_configs", None)
  26. return cleaned
  27. def get_collection_schema(url: str, collection: str):
  28. """Tõmba kollektsiooni schema"""
  29. print(f"📋 Schema: {collection}...")
  30. try:
  31. resp = requests.get(f"{url}/v1/schema", timeout=10)
  32. schema = resp.json()
  33. for cls in schema.get("classes", []):
  34. if cls["class"] == collection:
  35. print(f" ✓ Schema leitud")
  36. return cls
  37. return None
  38. except Exception as e:
  39. print(f" ❌ Viga: {e}")
  40. return None
  41. def get_objects(url: str, collection: str):
  42. """Tõmba kõik objektid REST API abil"""
  43. print(f"\n📤 Eksporditakse {collection}...")
  44. objects = []
  45. offset = 0
  46. limit = 100
  47. while True:
  48. try:
  49. resp = requests.get(
  50. f"{url}/v1/objects",
  51. params={
  52. "class": collection,
  53. "limit": limit,
  54. "offset": offset
  55. },
  56. timeout=30
  57. )
  58. if resp.status_code != 200:
  59. print(f" ⚠️ Viga: {resp.status_code}")
  60. break
  61. data = resp.json()
  62. items = data.get("objects", [])
  63. if not items:
  64. break
  65. objects.extend(items)
  66. offset += len(items)
  67. print(f" ✓ Tõmmatud: {offset} objekti...")
  68. if len(items) < limit:
  69. break
  70. except Exception as e:
  71. print(f" ❌ Viga: {e}")
  72. break
  73. print(f"✅ Eksportitud kokku: {len(objects)} objekti\n")
  74. return objects
  75. def create_collection_local(url: str, schema: dict):
  76. """Loo kolleektsioon lokaalse Weaviate'sse"""
  77. collection_name = schema["class"]
  78. print(f"📝 Kontrollitakse lokaalne kolleektsioon '{collection_name}'...")
  79. try:
  80. # Kontrolli kas juba eksisteerib
  81. resp = requests.get(f"{url}/v1/schema", timeout=10)
  82. existing = resp.json()
  83. for cls in existing.get("classes", []):
  84. if cls["class"] == collection_name:
  85. print(f" ✓ Kolleektsioon '{collection_name}' juba eksisteerib\n")
  86. return True
  87. # Puhasta schema (eemalda vectorizer infod)
  88. clean = clean_schema(schema)
  89. # Loo uus
  90. print(f" 📝 Luuakse uus kolleektsioon (ilma vectorizer'ita)...")
  91. resp = requests.post(
  92. f"{url}/v1/schema",
  93. json=clean,
  94. timeout=10
  95. )
  96. if resp.status_code in [200, 201]:
  97. print(f" ✓ Kolleektsioon '{collection_name}' loodud\n")
  98. return True
  99. else:
  100. print(f" ❌ Viga: {resp.status_code} - {resp.text[:200]}\n")
  101. return False
  102. except Exception as e:
  103. print(f" ❌ Viga: {e}\n")
  104. return False
  105. def import_objects_rest(url: str, collection: str, objects: list):
  106. """Importi objektid REST API abil"""
  107. print(f"📥 Importitakse {len(objects)} objekti...")
  108. success = 0
  109. errors = 0
  110. for i, obj in enumerate(objects, 1):
  111. try:
  112. obj_id = obj.get("id")
  113. vector = obj.get("vector")
  114. properties = obj.get("properties", {})
  115. payload = {
  116. "class": collection,
  117. "id": obj_id,
  118. "properties": properties
  119. }
  120. if vector:
  121. payload["vector"] = vector
  122. resp = requests.post(
  123. f"{url}/v1/objects",
  124. json=payload,
  125. timeout=10
  126. )
  127. if resp.status_code in [200, 201]:
  128. success += 1
  129. else:
  130. errors += 1
  131. if errors <= 5:
  132. print(f" ⚠️ Objekt {i}: {resp.status_code}")
  133. if i % 50 == 0:
  134. print(f" ✓ Importitud {i}/{len(objects)}...")
  135. except Exception as e:
  136. errors += 1
  137. if errors <= 5:
  138. print(f" ⚠️ Viga {i}: {e}")
  139. print(f"\n✅ Import valmis: {success} edu, {errors} viga\n")
  140. return success
  141. def main():
  142. print(f"\n{'='*60}")
  143. print(f"WEAVIATE MIGRATSIOON: {COLLECTION_NAME}")
  144. print(f"{'='*60}\n")
  145. print("🔗 Kontrollitakse ühendusi...")
  146. try:
  147. resp = requests.get(f"{REMOTE_URL}/v1/meta", timeout=5)
  148. print(f" ✓ Remote ({REMOTE_URL}): {resp.json().get('version', 'OK')}")
  149. except Exception as e:
  150. print(f" ❌ Remote viga: {e}")
  151. return 1
  152. try:
  153. resp = requests.get(f"{LOCAL_URL}/v1/meta", timeout=5)
  154. print(f" ✓ Lokaalne ({LOCAL_URL}): {resp.json().get('version', 'OK')}\n")
  155. except Exception as e:
  156. print(f" ❌ Lokaalne viga: {e}")
  157. return 1
  158. schema = get_collection_schema(REMOTE_URL, COLLECTION_NAME)
  159. if not schema:
  160. print(f"❌ Kollektsiooni '{COLLECTION_NAME}' remote'l pole\n")
  161. return 1
  162. if not create_collection_local(LOCAL_URL, schema):
  163. print(f"❌ Lokaalne kolleektsioon ei loodud\n")
  164. return 1
  165. objects = get_objects(REMOTE_URL, COLLECTION_NAME)
  166. if not objects:
  167. print(f"⚠️ Kollektsioonis '{COLLECTION_NAME}' pole objekte\n")
  168. return 0
  169. import_objects_rest(LOCAL_URL, COLLECTION_NAME, objects)
  170. print(f"{'='*60}")
  171. print("✨ Migratsioon valmis!")
  172. print(f"{'='*60}\n")
  173. return 0
  174. if __name__ == "__main__":
  175. exit(main())