check_weaviate.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # check_weaviate.py
  2. import json
  3. import re
  4. import weaviate
  5. from src.weaviate_client import WeaviateClient
  6. import sys
  7. sys.path.insert(0, './src')
  8. def parse_transport_context(context_data):
  9. """Parsi transpordikonteksti JSON-ist loetavaks"""
  10. if isinstance(context_data, str):
  11. # Proovi parsida string JSON-iks
  12. try:
  13. context_data = json.loads(context_data)
  14. except json.JSONDecodeError:
  15. # Kui ei õnnestu, tagasta algne string
  16. return context_data
  17. if isinstance(context_data, dict):
  18. # Kontrolli, kas on "analysis" väli
  19. analysis_text = context_data.get('analysis', '')
  20. relevance_score = context_data.get('relevance_score', 'N/A')
  21. # Eemalda markdowni koodiblokid (```json ja ```)
  22. analysis_text = re.sub(r'^```json\s*', '', analysis_text, flags=re.IGNORECASE)
  23. analysis_text = re.sub(r'\s*```$', '', analysis_text)
  24. # Proovi parsida analysis JSON-iks
  25. try:
  26. analysis_data = json.loads(analysis_text.strip())
  27. return {
  28. 'theoretical_contribution': analysis_data.get('theoretical_contribution', ''),
  29. 'practical_applicability': analysis_data.get('practical_applicability', ''),
  30. 'problem_solving': analysis_data.get('problem_solving', ''),
  31. 'limitations': analysis_data.get('limitations', ''),
  32. 'relevance_score': relevance_score,
  33. 'original_relevance': analysis_data.get('relevance_score', 'N/A')
  34. }
  35. except json.JSONDecodeError:
  36. # Kui ei õnnestu parsida, tagasta algne tekst
  37. return {
  38. 'raw_analysis': analysis_text[:200] + '...' if len(analysis_text) > 200 else analysis_text,
  39. 'relevance_score': relevance_score
  40. }
  41. return context_data
  42. def format_context_for_display(parsed_context):
  43. """Vorminda parsitud kontekst ilusaks kuvamiseks"""
  44. if isinstance(parsed_context, dict):
  45. formatted = []
  46. if 'theoretical_contribution' in parsed_context:
  47. formatted.append("📚 TEOREETILINE PANUS:")
  48. formatted.append(f" {parsed_context['theoretical_contribution'][:300]}...")
  49. formatted.append("")
  50. if 'practical_applicability' in parsed_context:
  51. formatted.append("🔧 PRAKTILINE RAKENDATAVUS:")
  52. formatted.append(f" {parsed_context['practical_applicability'][:300]}...")
  53. formatted.append("")
  54. if 'problem_solving' in parsed_context:
  55. formatted.append("🎯 PROBLEEMILAHDUS:")
  56. formatted.append(f" {parsed_context['problem_solving'][:300]}...")
  57. formatted.append("")
  58. if 'limitations' in parsed_context:
  59. formatted.append("⚠️ PIIRANGUD:")
  60. formatted.append(f" {parsed_context['limitations'][:300]}...")
  61. formatted.append("")
  62. formatted.append(f"⭐ RELEVANTSUSE SKOOR: {parsed_context.get('relevance_score', 'N/A')}")
  63. if 'original_relevance' in parsed_context:
  64. formatted.append(f" (Algne skoor: {parsed_context['original_relevance']})")
  65. return "\n".join(formatted)
  66. else:
  67. return str(parsed_context)[:500] + "..." if len(str(parsed_context)) > 500 else str(parsed_context)
  68. def main():
  69. client = WeaviateClient()
  70. try:
  71. collection = client.client.collections.get("ScientificArticle")
  72. # Loendi kokku
  73. count_response = collection.aggregate.over_all(total_count=True)
  74. total = count_response.total_count
  75. print(f"\n✅ Weaviate'is on {total} artiklit.")
  76. # Võta mõni näidis välja
  77. if total > 0:
  78. print("\n📄 ESIMESED 3 ARTIKLIT:")
  79. print("=" * 80)
  80. response = collection.query.fetch_objects(limit=3)
  81. for i, obj in enumerate(response.objects):
  82. print(f"\n{i+1}. ID: {obj.properties.get('article_id', 'N/A')}")
  83. print(f" 📝 Pealkiri: {obj.properties.get('title', 'N/A')}")
  84. print(f" 👥 Autorid: {', '.join(obj.properties.get('authors', ['N/A']))}")
  85. print(f" 📅 Aasta: {obj.properties.get('year', 'N/A')}")
  86. print(f" 📖 Ajakiri: {obj.properties.get('journal', 'N/A')}")
  87. doi = obj.properties.get('doi', '')
  88. if doi:
  89. print(f" 🔗 DOI: https://doi.org/{doi}")
  90. # Töötle konteksti
  91. context = obj.properties.get('transport_context', '')
  92. if context:
  93. print("\n 🚗 TRANSPORDI KONTEKST:")
  94. print(" " + "-" * 50)
  95. parsed_context = parse_transport_context(context)
  96. formatted_context = format_context_for_display(parsed_context)
  97. # Prindi rea kaupa, et oleks loetavam
  98. for line in formatted_context.split('\n'):
  99. print(f" {line}")
  100. print(f"\n 🏷️ Võtmesõnad: {', '.join(obj.properties.get('key_concepts', ['N/A']))}")
  101. print(f" 📊 Meetodid: {', '.join(obj.properties.get('methods_used', ['N/A']))}")
  102. if i < len(response.objects) - 1:
  103. print("\n" + "-" * 80)
  104. # Näita lisastatistikat
  105. print(f"\n📊 STATISTIKA:")
  106. print(f" • Artikleid kokku: {total}")
  107. # Kui on rohkem kui 3 artiklit, näita lisavalikuid
  108. if total > 3:
  109. print(f"\n🔍 SOOVIKSID VAADATA:")
  110. print(" 1. Viimaseid lisatud artikleid")
  111. print(" 2. Kõrgeima relevantsusega artikleid")
  112. print(" 3. Kindla autoriga artikleid")
  113. print(" 4. Kindla aastaga artikleid")
  114. valik = input("\nVali number (vajuta Enter jätkamiseks): ")
  115. if valik == "1":
  116. response = collection.query.fetch_objects(
  117. limit=3,
  118. sort=weaviate.classes.query.Sort.by_property("processing_date", ascending=False)
  119. )
  120. print("\n⏰ VIIMASED 3 ARTIKLIT:")
  121. for obj in response.objects:
  122. print(f" • {obj.properties.get('title', 'N/A')}")
  123. print(f" (Lisatud: {obj.properties.get('processing_date', 'N/A')})")
  124. print()
  125. elif valik == "2":
  126. response = collection.query.fetch_objects(
  127. limit=3,
  128. sort=weaviate.classes.query.Sort.by_property("relevance_score", ascending=False)
  129. )
  130. print("\n⭐ KÕRGEIMA RELEVANTSUSEGA ARTIKLID:")
  131. for obj in response.objects:
  132. print(f" • {obj.properties.get('title', 'N/A')}")
  133. print(f" (Relevantsus: {obj.properties.get('relevance_score', 'N/A')}/10)")
  134. print()
  135. except Exception as e:
  136. print(f"\n❌ VIGA: {e}")
  137. import traceback
  138. traceback.print_exc()
  139. finally:
  140. client.close()
  141. print("\n👋 Ühendus Weaviate'iga suletud.")
  142. if __name__ == "__main__":
  143. main()