| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- #!/usr/bin/env python3
- """
- Weaviate migratsioon REST API abil - ainult ScientificArticle
- Eemalda vectorizer konfig, et kohalik Weaviate saaks skeem luua
- """
- import requests
- import json
- import copy
- REMOTE_URL = "http://hetzner:9020"
- LOCAL_URL = "http://localhost:8080"
- COLLECTION_NAME = "ScientificArticle"
- def clean_schema(schema: dict):
- """Eemalda vectorizer/module references"""
- cleaned = copy.deepcopy(schema)
-
- # Eemalda vectorizer konfid
- cleaned.pop("vectorizer", None)
- cleaned.pop("vectorizerConfig", None)
- cleaned.pop("vectorIndexConfig", None)
- cleaned.pop("vector_config", None)
- cleaned.pop("vectorIndexType", None)
-
- # Eemalda propertytest vectorizer infod
- for prop in cleaned.get("properties", []):
- prop.pop("vectorizer", None)
- prop.pop("vectorizerConfig", None)
- prop.pop("vectorizer_configs", None)
-
- return cleaned
- def get_collection_schema(url: str, collection: str):
- """Tõmba kollektsiooni schema"""
- print(f"📋 Schema: {collection}...")
- try:
- resp = requests.get(f"{url}/v1/schema", timeout=10)
- schema = resp.json()
-
- for cls in schema.get("classes", []):
- if cls["class"] == collection:
- print(f" ✓ Schema leitud")
- return cls
- return None
- except Exception as e:
- print(f" ❌ Viga: {e}")
- return None
- def get_objects(url: str, collection: str):
- """Tõmba kõik objektid REST API abil"""
- print(f"\n📤 Eksporditakse {collection}...")
-
- objects = []
- offset = 0
- limit = 100
-
- while True:
- try:
- resp = requests.get(
- f"{url}/v1/objects",
- params={
- "class": collection,
- "limit": limit,
- "offset": offset
- },
- timeout=30
- )
-
- if resp.status_code != 200:
- print(f" ⚠️ Viga: {resp.status_code}")
- break
-
- data = resp.json()
- items = data.get("objects", [])
-
- if not items:
- break
-
- objects.extend(items)
- offset += len(items)
- print(f" ✓ Tõmmatud: {offset} objekti...")
-
- if len(items) < limit:
- break
-
- except Exception as e:
- print(f" ❌ Viga: {e}")
- break
-
- print(f"✅ Eksportitud kokku: {len(objects)} objekti\n")
- return objects
- def create_collection_local(url: str, schema: dict):
- """Loo kolleektsioon lokaalse Weaviate'sse"""
- collection_name = schema["class"]
- print(f"📝 Kontrollitakse lokaalne kolleektsioon '{collection_name}'...")
-
- try:
- # Kontrolli kas juba eksisteerib
- resp = requests.get(f"{url}/v1/schema", timeout=10)
- existing = resp.json()
-
- for cls in existing.get("classes", []):
- if cls["class"] == collection_name:
- print(f" ✓ Kolleektsioon '{collection_name}' juba eksisteerib\n")
- return True
-
- # Puhasta schema (eemalda vectorizer infod)
- clean = clean_schema(schema)
-
- # Loo uus
- print(f" 📝 Luuakse uus kolleektsioon (ilma vectorizer'ita)...")
- resp = requests.post(
- f"{url}/v1/schema",
- json=clean,
- timeout=10
- )
-
- if resp.status_code in [200, 201]:
- print(f" ✓ Kolleektsioon '{collection_name}' loodud\n")
- return True
- else:
- print(f" ❌ Viga: {resp.status_code} - {resp.text[:200]}\n")
- return False
-
- except Exception as e:
- print(f" ❌ Viga: {e}\n")
- return False
- def import_objects_rest(url: str, collection: str, objects: list):
- """Importi objektid REST API abil"""
- print(f"📥 Importitakse {len(objects)} objekti...")
-
- success = 0
- errors = 0
-
- for i, obj in enumerate(objects, 1):
- try:
- obj_id = obj.get("id")
- vector = obj.get("vector")
- properties = obj.get("properties", {})
-
- payload = {
- "class": collection,
- "id": obj_id,
- "properties": properties
- }
-
- if vector:
- payload["vector"] = vector
-
- resp = requests.post(
- f"{url}/v1/objects",
- json=payload,
- timeout=10
- )
-
- if resp.status_code in [200, 201]:
- success += 1
- else:
- errors += 1
- if errors <= 5:
- print(f" ⚠️ Objekt {i}: {resp.status_code}")
-
- if i % 50 == 0:
- print(f" ✓ Importitud {i}/{len(objects)}...")
-
- except Exception as e:
- errors += 1
- if errors <= 5:
- print(f" ⚠️ Viga {i}: {e}")
-
- print(f"\n✅ Import valmis: {success} edu, {errors} viga\n")
- return success
- def main():
- print(f"\n{'='*60}")
- print(f"WEAVIATE MIGRATSIOON: {COLLECTION_NAME}")
- print(f"{'='*60}\n")
-
- print("🔗 Kontrollitakse ühendusi...")
-
- try:
- resp = requests.get(f"{REMOTE_URL}/v1/meta", timeout=5)
- print(f" ✓ Remote ({REMOTE_URL}): {resp.json().get('version', 'OK')}")
- except Exception as e:
- print(f" ❌ Remote viga: {e}")
- return 1
-
- try:
- resp = requests.get(f"{LOCAL_URL}/v1/meta", timeout=5)
- print(f" ✓ Lokaalne ({LOCAL_URL}): {resp.json().get('version', 'OK')}\n")
- except Exception as e:
- print(f" ❌ Lokaalne viga: {e}")
- return 1
-
- schema = get_collection_schema(REMOTE_URL, COLLECTION_NAME)
- if not schema:
- print(f"❌ Kollektsiooni '{COLLECTION_NAME}' remote'l pole\n")
- return 1
-
- if not create_collection_local(LOCAL_URL, schema):
- print(f"❌ Lokaalne kolleektsioon ei loodud\n")
- return 1
-
- objects = get_objects(REMOTE_URL, COLLECTION_NAME)
-
- if not objects:
- print(f"⚠️ Kollektsioonis '{COLLECTION_NAME}' pole objekte\n")
- return 0
-
- import_objects_rest(LOCAL_URL, COLLECTION_NAME, objects)
-
- print(f"{'='*60}")
- print("✨ Migratsioon valmis!")
- print(f"{'='*60}\n")
- return 0
- if __name__ == "__main__":
- exit(main())
|