Bladeren bron

Artiklite näidispäring baasist

Ardo Kubjas 4 maanden geleden
bovenliggende
commit
4f359eba6c
1 gewijzigde bestanden met toevoegingen van 167 en 15 verwijderingen
  1. 167 15
      check_weaviate.py

+ 167 - 15
check_weaviate.py

@@ -1,23 +1,175 @@
 # check_weaviate.py
+import json
+import re
+import weaviate  
 from src.weaviate_client import WeaviateClient
 import sys
+
 sys.path.insert(0, './src')
 
-client = WeaviateClient()
-collection = client.client.collections.get("ScientificArticle")
+def parse_transport_context(context_data):
+    """Parsi transpordikonteksti JSON-ist loetavaks"""
+    if isinstance(context_data, str):
+        # Proovi parsida string JSON-iks
+        try:
+            context_data = json.loads(context_data)
+        except json.JSONDecodeError:
+            # Kui ei õnnestu, tagasta algne string
+            return context_data
+    
+    if isinstance(context_data, dict):
+        # Kontrolli, kas on "analysis" väli
+        analysis_text = context_data.get('analysis', '')
+        relevance_score = context_data.get('relevance_score', 'N/A')
+        
+        # Eemalda markdowni koodiblokid (```json ja ```)
+        analysis_text = re.sub(r'^```json\s*', '', analysis_text, flags=re.IGNORECASE)
+        analysis_text = re.sub(r'\s*```$', '', analysis_text)
+        
+        # Proovi parsida analysis JSON-iks
+        try:
+            analysis_data = json.loads(analysis_text.strip())
+            return {
+                'theoretical_contribution': analysis_data.get('theoretical_contribution', ''),
+                'practical_applicability': analysis_data.get('practical_applicability', ''),
+                'problem_solving': analysis_data.get('problem_solving', ''),
+                'limitations': analysis_data.get('limitations', ''),
+                'relevance_score': relevance_score,
+                'original_relevance': analysis_data.get('relevance_score', 'N/A')
+            }
+        except json.JSONDecodeError:
+            # Kui ei õnnestu parsida, tagasta algne tekst
+            return {
+                'raw_analysis': analysis_text[:200] + '...' if len(analysis_text) > 200 else analysis_text,
+                'relevance_score': relevance_score
+            }
+    
+    return context_data
 
-# Loendi kokku
-count_response = collection.aggregate.over_all(total_count=True)
-total = count_response.total_count
-print(f"\n✅ Weaviate'is on {total} artiklit.")
+def format_context_for_display(parsed_context):
+    """Vorminda parsitud kontekst ilusaks kuvamiseks"""
+    if isinstance(parsed_context, dict):
+        formatted = []
+        
+        if 'theoretical_contribution' in parsed_context:
+            formatted.append("📚 TEOREETILINE PANUS:")
+            formatted.append(f"   {parsed_context['theoretical_contribution'][:300]}...")
+            formatted.append("")
+        
+        if 'practical_applicability' in parsed_context:
+            formatted.append("🔧 PRAKTILINE RAKENDATAVUS:")
+            formatted.append(f"   {parsed_context['practical_applicability'][:300]}...")
+            formatted.append("")
+        
+        if 'problem_solving' in parsed_context:
+            formatted.append("🎯 PROBLEEMILAHDUS:")
+            formatted.append(f"   {parsed_context['problem_solving'][:300]}...")
+            formatted.append("")
+        
+        if 'limitations' in parsed_context:
+            formatted.append("⚠️ PIIRANGUD:")
+            formatted.append(f"   {parsed_context['limitations'][:300]}...")
+            formatted.append("")
+        
+        formatted.append(f"⭐ RELEVANTSUSE SKOOR: {parsed_context.get('relevance_score', 'N/A')}")
+        
+        if 'original_relevance' in parsed_context:
+            formatted.append(f"   (Algne skoor: {parsed_context['original_relevance']})")
+        
+        return "\n".join(formatted)
+    else:
+        return str(parsed_context)[:500] + "..." if len(str(parsed_context)) > 500 else str(parsed_context)
 
-# Võta mõni näidis välja
-if total > 0:
-    print("\n📄 Esimesed 3 artiklit:")
-    response = collection.query.fetch_objects(limit=3)
-    for i, obj in enumerate(response.objects):
-        print(f"\n{i+1}. ID: {obj.properties.get('article_id', 'N/A')}")
-        print(f"   Pealkiri: {obj.properties.get('title', 'N/A')}")
-        print(f"   Autorid: {obj.properties.get('authors', ['N/A'])}")
+def main():
+    client = WeaviateClient()
+    
+    try:
+        collection = client.client.collections.get("ScientificArticle")
+        
+        # Loendi kokku
+        count_response = collection.aggregate.over_all(total_count=True)
+        total = count_response.total_count
+        print(f"\n✅ Weaviate'is on {total} artiklit.")
+        
+        # Võta mõni näidis välja
+        if total > 0:
+            print("\n📄 ESIMESED 3 ARTIKLIT:")
+            print("=" * 80)
+            response = collection.query.fetch_objects(limit=3)
+            
+            for i, obj in enumerate(response.objects):
+                print(f"\n{i+1}. ID: {obj.properties.get('article_id', 'N/A')}")
+                print(f"   📝 Pealkiri: {obj.properties.get('title', 'N/A')}")
+                print(f"   👥 Autorid: {', '.join(obj.properties.get('authors', ['N/A']))}")
+                print(f"   📅 Aasta: {obj.properties.get('year', 'N/A')}")
+                print(f"   📖 Ajakiri: {obj.properties.get('journal', 'N/A')}")
+                
+                doi = obj.properties.get('doi', '')
+                if doi:
+                    print(f"   🔗 DOI: https://doi.org/{doi}")
+                
+                # Töötle konteksti
+                context = obj.properties.get('transport_context', '')
+                if context:
+                    print("\n   🚗 TRANSPORDI KONTEKST:")
+                    print("   " + "-" * 50)
+                    
+                    parsed_context = parse_transport_context(context)
+                    formatted_context = format_context_for_display(parsed_context)
+                    
+                    # Prindi rea kaupa, et oleks loetavam
+                    for line in formatted_context.split('\n'):
+                        print(f"   {line}")
+                
+                print(f"\n   🏷️ Võtmesõnad: {', '.join(obj.properties.get('key_concepts', ['N/A']))}")
+                print(f"   📊 Meetodid: {', '.join(obj.properties.get('methods_used', ['N/A']))}")
+                
+                if i < len(response.objects) - 1:
+                    print("\n" + "-" * 80)
+        
+        # Näita lisastatistikat
+        print(f"\n📊 STATISTIKA:")
+        print(f"   • Artikleid kokku: {total}")
+        
+        # Kui on rohkem kui 3 artiklit, näita lisavalikuid
+        if total > 3:
+            print(f"\n🔍 SOOVIKSID VAADATA:")
+            print("   1. Viimaseid lisatud artikleid")
+            print("   2. Kõrgeima relevantsusega artikleid")
+            print("   3. Kindla autoriga artikleid")
+            print("   4. Kindla aastaga artikleid")
+            
+            valik = input("\nVali number (vajuta Enter jätkamiseks): ")
+            
+            if valik == "1":
+                response = collection.query.fetch_objects(
+                    limit=3,
+                    sort=weaviate.classes.query.Sort.by_property("processing_date", ascending=False)
+                )
+                print("\n⏰ VIIMASED 3 ARTIKLIT:")
+                for obj in response.objects:
+                    print(f"   • {obj.properties.get('title', 'N/A')}")
+                    print(f"     (Lisatud: {obj.properties.get('processing_date', 'N/A')})")
+                    print()
+            
+            elif valik == "2":
+                response = collection.query.fetch_objects(
+                    limit=3,
+                    sort=weaviate.classes.query.Sort.by_property("relevance_score", ascending=False)
+                )
+                print("\n⭐ KÕRGEIMA RELEVANTSUSEGA ARTIKLID:")
+                for obj in response.objects:
+                    print(f"   • {obj.properties.get('title', 'N/A')}")
+                    print(f"     (Relevantsus: {obj.properties.get('relevance_score', 'N/A')}/10)")
+                    print()
+        
+    except Exception as e:
+        print(f"\n❌ VIGA: {e}")
+        import traceback
+        traceback.print_exc()
+    finally:
+        client.close()
+        print("\n👋 Ühendus Weaviate'iga suletud.")
 
-client.close()
+if __name__ == "__main__":
+    main()