#!/usr/bin/env python3 """ Weaviate migratsioon REST API abil - ainult ScientificArticle Eemalda vectorizer konfig, et kohalik Weaviate saaks skeem luua """ import requests import json import copy REMOTE_URL = "http://hetzner:9020" LOCAL_URL = "http://localhost:8080" COLLECTION_NAME = "ScientificArticle" def clean_schema(schema: dict): """Eemalda vectorizer/module references""" cleaned = copy.deepcopy(schema) # Eemalda vectorizer konfid cleaned.pop("vectorizer", None) cleaned.pop("vectorizerConfig", None) cleaned.pop("vectorIndexConfig", None) cleaned.pop("vector_config", None) cleaned.pop("vectorIndexType", None) # Eemalda propertytest vectorizer infod for prop in cleaned.get("properties", []): prop.pop("vectorizer", None) prop.pop("vectorizerConfig", None) prop.pop("vectorizer_configs", None) return cleaned def get_collection_schema(url: str, collection: str): """Tõmba kollektsiooni schema""" print(f"📋 Schema: {collection}...") try: resp = requests.get(f"{url}/v1/schema", timeout=10) schema = resp.json() for cls in schema.get("classes", []): if cls["class"] == collection: print(f" ✓ Schema leitud") return cls return None except Exception as e: print(f" ❌ Viga: {e}") return None def get_objects(url: str, collection: str): """Tõmba kõik objektid REST API abil""" print(f"\n📤 Eksporditakse {collection}...") objects = [] offset = 0 limit = 100 while True: try: resp = requests.get( f"{url}/v1/objects", params={ "class": collection, "limit": limit, "offset": offset }, timeout=30 ) if resp.status_code != 200: print(f" ⚠️ Viga: {resp.status_code}") break data = resp.json() items = data.get("objects", []) if not items: break objects.extend(items) offset += len(items) print(f" ✓ Tõmmatud: {offset} objekti...") if len(items) < limit: break except Exception as e: print(f" ❌ Viga: {e}") break print(f"✅ Eksportitud kokku: {len(objects)} objekti\n") return objects def create_collection_local(url: str, schema: dict): """Loo kolleektsioon lokaalse Weaviate'sse""" collection_name = schema["class"] print(f"📝 Kontrollitakse lokaalne kolleektsioon '{collection_name}'...") try: # Kontrolli kas juba eksisteerib resp = requests.get(f"{url}/v1/schema", timeout=10) existing = resp.json() for cls in existing.get("classes", []): if cls["class"] == collection_name: print(f" ✓ Kolleektsioon '{collection_name}' juba eksisteerib\n") return True # Puhasta schema (eemalda vectorizer infod) clean = clean_schema(schema) # Loo uus print(f" 📝 Luuakse uus kolleektsioon (ilma vectorizer'ita)...") resp = requests.post( f"{url}/v1/schema", json=clean, timeout=10 ) if resp.status_code in [200, 201]: print(f" ✓ Kolleektsioon '{collection_name}' loodud\n") return True else: print(f" ❌ Viga: {resp.status_code} - {resp.text[:200]}\n") return False except Exception as e: print(f" ❌ Viga: {e}\n") return False def import_objects_rest(url: str, collection: str, objects: list): """Importi objektid REST API abil""" print(f"📥 Importitakse {len(objects)} objekti...") success = 0 errors = 0 for i, obj in enumerate(objects, 1): try: obj_id = obj.get("id") vector = obj.get("vector") properties = obj.get("properties", {}) payload = { "class": collection, "id": obj_id, "properties": properties } if vector: payload["vector"] = vector resp = requests.post( f"{url}/v1/objects", json=payload, timeout=10 ) if resp.status_code in [200, 201]: success += 1 else: errors += 1 if errors <= 5: print(f" ⚠️ Objekt {i}: {resp.status_code}") if i % 50 == 0: print(f" ✓ Importitud {i}/{len(objects)}...") except Exception as e: errors += 1 if errors <= 5: print(f" ⚠️ Viga {i}: {e}") print(f"\n✅ Import valmis: {success} edu, {errors} viga\n") return success def main(): print(f"\n{'='*60}") print(f"WEAVIATE MIGRATSIOON: {COLLECTION_NAME}") print(f"{'='*60}\n") print("🔗 Kontrollitakse ühendusi...") try: resp = requests.get(f"{REMOTE_URL}/v1/meta", timeout=5) print(f" ✓ Remote ({REMOTE_URL}): {resp.json().get('version', 'OK')}") except Exception as e: print(f" ❌ Remote viga: {e}") return 1 try: resp = requests.get(f"{LOCAL_URL}/v1/meta", timeout=5) print(f" ✓ Lokaalne ({LOCAL_URL}): {resp.json().get('version', 'OK')}\n") except Exception as e: print(f" ❌ Lokaalne viga: {e}") return 1 schema = get_collection_schema(REMOTE_URL, COLLECTION_NAME) if not schema: print(f"❌ Kollektsiooni '{COLLECTION_NAME}' remote'l pole\n") return 1 if not create_collection_local(LOCAL_URL, schema): print(f"❌ Lokaalne kolleektsioon ei loodud\n") return 1 objects = get_objects(REMOTE_URL, COLLECTION_NAME) if not objects: print(f"⚠️ Kollektsioonis '{COLLECTION_NAME}' pole objekte\n") return 0 import_objects_rest(LOCAL_URL, COLLECTION_NAME, objects) print(f"{'='*60}") print("✨ Migratsioon valmis!") print(f"{'='*60}\n") return 0 if __name__ == "__main__": exit(main())