| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- # check_weaviate.py
- import json
- import re
- import weaviate
- from src.weaviate_client import WeaviateClient
- import sys
- sys.path.insert(0, './src')
- def parse_transport_context(context_data):
- """Parsi transpordikonteksti JSON-ist loetavaks"""
- if isinstance(context_data, str):
- # Proovi parsida string JSON-iks
- try:
- context_data = json.loads(context_data)
- except json.JSONDecodeError:
- # Kui ei õnnestu, tagasta algne string
- return context_data
-
- if isinstance(context_data, dict):
- # Kontrolli, kas on "analysis" väli
- analysis_text = context_data.get('analysis', '')
- relevance_score = context_data.get('relevance_score', 'N/A')
-
- # Eemalda markdowni koodiblokid (```json ja ```)
- analysis_text = re.sub(r'^```json\s*', '', analysis_text, flags=re.IGNORECASE)
- analysis_text = re.sub(r'\s*```$', '', analysis_text)
-
- # Proovi parsida analysis JSON-iks
- try:
- analysis_data = json.loads(analysis_text.strip())
- return {
- 'theoretical_contribution': analysis_data.get('theoretical_contribution', ''),
- 'practical_applicability': analysis_data.get('practical_applicability', ''),
- 'problem_solving': analysis_data.get('problem_solving', ''),
- 'limitations': analysis_data.get('limitations', ''),
- 'relevance_score': relevance_score,
- 'original_relevance': analysis_data.get('relevance_score', 'N/A')
- }
- except json.JSONDecodeError:
- # Kui ei õnnestu parsida, tagasta algne tekst
- return {
- 'raw_analysis': analysis_text[:200] + '...' if len(analysis_text) > 200 else analysis_text,
- 'relevance_score': relevance_score
- }
-
- return context_data
- def format_context_for_display(parsed_context):
- """Vorminda parsitud kontekst ilusaks kuvamiseks"""
- if isinstance(parsed_context, dict):
- formatted = []
-
- if 'theoretical_contribution' in parsed_context:
- formatted.append("📚 TEOREETILINE PANUS:")
- formatted.append(f" {parsed_context['theoretical_contribution'][:300]}...")
- formatted.append("")
-
- if 'practical_applicability' in parsed_context:
- formatted.append("🔧 PRAKTILINE RAKENDATAVUS:")
- formatted.append(f" {parsed_context['practical_applicability'][:300]}...")
- formatted.append("")
-
- if 'problem_solving' in parsed_context:
- formatted.append("🎯 PROBLEEMILAHDUS:")
- formatted.append(f" {parsed_context['problem_solving'][:300]}...")
- formatted.append("")
-
- if 'limitations' in parsed_context:
- formatted.append("⚠️ PIIRANGUD:")
- formatted.append(f" {parsed_context['limitations'][:300]}...")
- formatted.append("")
-
- formatted.append(f"⭐ RELEVANTSUSE SKOOR: {parsed_context.get('relevance_score', 'N/A')}")
-
- if 'original_relevance' in parsed_context:
- formatted.append(f" (Algne skoor: {parsed_context['original_relevance']})")
-
- return "\n".join(formatted)
- else:
- return str(parsed_context)[:500] + "..." if len(str(parsed_context)) > 500 else str(parsed_context)
- def main():
- client = WeaviateClient()
-
- try:
- collection = client.client.collections.get("ScientificArticle")
-
- # Loendi kokku
- count_response = collection.aggregate.over_all(total_count=True)
- total = count_response.total_count
- print(f"\n✅ Weaviate'is on {total} artiklit.")
-
- # Võta mõni näidis välja
- if total > 0:
- print("\n📄 ESIMESED 3 ARTIKLIT:")
- print("=" * 80)
- response = collection.query.fetch_objects(limit=3)
-
- for i, obj in enumerate(response.objects):
- print(f"\n{i+1}. ID: {obj.properties.get('article_id', 'N/A')}")
- print(f" 📝 Pealkiri: {obj.properties.get('title', 'N/A')}")
- print(f" 👥 Autorid: {', '.join(obj.properties.get('authors', ['N/A']))}")
- print(f" 📅 Aasta: {obj.properties.get('year', 'N/A')}")
- print(f" 📖 Ajakiri: {obj.properties.get('journal', 'N/A')}")
-
- doi = obj.properties.get('doi', '')
- if doi:
- print(f" 🔗 DOI: https://doi.org/{doi}")
-
- # Töötle konteksti
- context = obj.properties.get('transport_context', '')
- if context:
- print("\n 🚗 TRANSPORDI KONTEKST:")
- print(" " + "-" * 50)
-
- parsed_context = parse_transport_context(context)
- formatted_context = format_context_for_display(parsed_context)
-
- # Prindi rea kaupa, et oleks loetavam
- for line in formatted_context.split('\n'):
- print(f" {line}")
-
- print(f"\n 🏷️ Võtmesõnad: {', '.join(obj.properties.get('key_concepts', ['N/A']))}")
- print(f" 📊 Meetodid: {', '.join(obj.properties.get('methods_used', ['N/A']))}")
-
- if i < len(response.objects) - 1:
- print("\n" + "-" * 80)
-
- # Näita lisastatistikat
- print(f"\n📊 STATISTIKA:")
- print(f" • Artikleid kokku: {total}")
-
- # Kui on rohkem kui 3 artiklit, näita lisavalikuid
- if total > 3:
- print(f"\n🔍 SOOVIKSID VAADATA:")
- print(" 1. Viimaseid lisatud artikleid")
- print(" 2. Kõrgeima relevantsusega artikleid")
- print(" 3. Kindla autoriga artikleid")
- print(" 4. Kindla aastaga artikleid")
-
- valik = input("\nVali number (vajuta Enter jätkamiseks): ")
-
- if valik == "1":
- response = collection.query.fetch_objects(
- limit=3,
- sort=weaviate.classes.query.Sort.by_property("processing_date", ascending=False)
- )
- print("\n⏰ VIIMASED 3 ARTIKLIT:")
- for obj in response.objects:
- print(f" • {obj.properties.get('title', 'N/A')}")
- print(f" (Lisatud: {obj.properties.get('processing_date', 'N/A')})")
- print()
-
- elif valik == "2":
- response = collection.query.fetch_objects(
- limit=3,
- sort=weaviate.classes.query.Sort.by_property("relevance_score", ascending=False)
- )
- print("\n⭐ KÕRGEIMA RELEVANTSUSEGA ARTIKLID:")
- for obj in response.objects:
- print(f" • {obj.properties.get('title', 'N/A')}")
- print(f" (Relevantsus: {obj.properties.get('relevance_score', 'N/A')}/10)")
- print()
-
- except Exception as e:
- print(f"\n❌ VIGA: {e}")
- import traceback
- traceback.print_exc()
- finally:
- client.close()
- print("\n👋 Ühendus Weaviate'iga suletud.")
- if __name__ == "__main__":
- main()
|