| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309 |
- #!/usr/bin/env python3
- """
- Päringukliendi näide Weaviate'i baasile
- """
- import sys
- import os
- import json
- from datetime import datetime
- import re
- # Lisa src kaust Pythoni teele
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
- from src.query_engine import QueryEngine
- def print_result(result: dict):
- """Prindi tulemus ilusalt"""
- print("\n" + "="*80)
- print(f"PÄRING: {result.get('query', '')}")
- print(f"TÜÜP: {result.get('search_type', '')} | LEITI: {result.get('articles_found', 0)} artiklit")
- print(f"USALDUSVÄÄRSUS: {result.get('confidence', 0):.2%}")
- print("="*80)
-
- # Vastus
- print("\n📝 VASTUS:")
- print(result.get('answer', ''))
-
- # Allikad
- sources = result.get('sources', [])
- if sources:
- print(f"\n📚 ALLIKAD ({len(sources)}):")
- for i, source in enumerate(sources, 1):
- print(f"\n{i}. {source.get('title', '')}")
- print(f" Autorid: {', '.join(source.get('authors', []))}")
- print(f" Aasta: {source.get('year', '')} | Žurnaal: {source.get('journal', '')}")
- print(f" Relevantsus: {source.get('relevance_score', 5)}/10")
- print(f" Skoor: {source.get('score', 0):.3f}")
- if source.get('summary'):
- print(f" Kokkuvõte: {source.get('summary')}")
-
- print("\n" + "="*80)
- print(f"Kuupäev: {result.get('timestamp', '')}")
- def interactive_mode():
- """Interaktiivne režiim"""
- print("🤖 TRANSFORDIARTIKLITE PÄRINGUMOOTOR")
- print("="*60)
- print("Kasuta järgmisi käske:")
- print(" ? - Abi")
- print(" q - Välju")
- print(" t [päring] - Tehniline vastus")
- print(" l [päring] - Lühike vastus")
- print(" d [päring] - Detailne vastus (vaikimisi)")
- print(" s [päring] - Semantiline otsing")
- print(" k [päring] - Võtmesõnade otsing")
- print(" h [päring] - Hübriidotsing (vaikimisi)")
- print("="*60)
-
- engine = QueryEngine()
-
- while True:
- try:
- user_input = input("\n> ").strip()
-
- if not user_input:
- continue
-
- if user_input.lower() in ['q', 'quit', 'exit']:
- print("Head aega!")
- break
-
- if user_input == '?':
- print("""
- Käsud:
- t [päring] - Tehniline vastus (spetsialistidele)
- l [päring] - Lühike vastus (kokkuvõte)
- d [päring] - Detailne vastus (põhjalik analüüs)
- s [päring] - Semantiline otsing (mõttesisu)
- k [päring] - Võtmesõnade otsing (tekstipõhine)
- h [päring] - Hübriidotsing (mõlemad)
-
- Näited:
- t kuidas parandada teede ohutust
- l elektriautod linnaliikluses
- d jalgrataste ja autode jagatud teed
- """)
- continue
-
- # Parse command
- parts = user_input.split(' ', 1)
- if len(parts) < 2:
- print("❌ Vigane käsk. Kasuta: [käsk] [päring]")
- continue
-
- command, query = parts[0].lower(), parts[1]
-
- # Determine parameters
- if command == 't':
- context_type = "technical"
- search_type = "hybrid"
- elif command == 'l':
- context_type = "concise"
- search_type = "hybrid"
- elif command == 'd':
- context_type = "detailed"
- search_type = "hybrid"
- elif command == 's':
- context_type = "detailed"
- search_type = "semantic"
- elif command == 'k':
- context_type = "detailed"
- search_type = "keyword"
- elif command == 'h':
- context_type = "detailed"
- search_type = "hybrid"
- else:
- print(f"❌ Tundmatu käsk: {command}")
- continue
-
- print(f"🔍 Otsin: '{query}'...")
- result = engine.ask(query, search_type=search_type, context_type=context_type)
- print_result(result)
-
- except KeyboardInterrupt:
- print("\n\nVäljun...")
- break
- except Exception as e:
- print(f"❌ Viga: {str(e)}")
-
- engine.close()
- def batch_mode(queries_file: str):
- """Pakettrežiim - päringud failist"""
- try:
- with open(queries_file, 'r', encoding='utf-8') as f:
- queries = [line.strip() for line in f if line.strip()]
- except FileNotFoundError:
- print(f"❌ Faili {queries_file} ei leitud")
- return
-
- engine = QueryEngine()
- results = []
-
- print(f"⏳ Töötlen {len(queries)} päringut...")
-
- for i, query in enumerate(queries, 1):
- print(f" {i}/{len(queries)}: '{query}'")
-
- try:
- result = engine.ask(query)
- results.append(result)
-
- # Salvesta iga tulemus eraldi faili
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- filename = f"tmp/query_result_{timestamp}_{i}.json"
-
- with open(filename, 'w', encoding='utf-8') as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
-
- print(f" ✓ Salvestatud {filename}")
- # Konverteerime json'i markdown failiks
- convert_json_file_to_markdown(filename)
-
- except Exception as e:
- print(f" ✗ Viga: {str(e)}")
- results.append({
- "query": query,
- "error": str(e),
- "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- })
-
- engine.close()
-
- # Salvesta kõik tulemused ühte faili
- summary_file = f"tmp/query_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
- with open(summary_file, 'w', encoding='utf-8') as f:
- json.dump(results, f, ensure_ascii=False, indent=2)
-
- print(f"\n✅ Valmis! Tulemused salvestatud faili: {summary_file}")
- # Konverteerime json'i markdown failiks
- convert_json_file_to_markdown(summary_file)
- def single_query(query: str, search_type: str = "hybrid", context_type: str = "detailed"):
- """Üksik päring"""
- engine = QueryEngine()
-
- print(f"🔍 Päring: {query}")
- result = engine.ask(query, search_type=search_type, context_type=context_type)
-
- print_result(result)
- engine.close()
-
- # Salvesta tulemus
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- filename = f"tmp/query_{timestamp}.json"
-
- with open(filename, 'w', encoding='utf-8') as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
-
- print(f"📄 Tulemus salvestatud faili: {filename}")
- # Konverteerime json'i markdown failiks
- convert_json_file_to_markdown(filename)
- def json_to_markdown(json_data):
- """
- Konverteerib transpordiartiklite analüüsi JSON-vormingu Markdown-vormingusse
- """
- markdown_content = "# Transpordiartiklite analüüsi kokkuvõte\n\n"
-
- for i, query_result in enumerate(json_data):
- # Pealkiri
- query = query_result.get("query", f"Päring {i+1}")
- markdown_content += f"## {i+1}. {query}\n\n"
-
- # Metaandmed
- timestamp = query_result.get("timestamp", "")
- articles_found = query_result.get("articles_found", 0)
- confidence = query_result.get("confidence", 0)
-
- markdown_content += f"**Aeg:** {timestamp}\n"
- markdown_content += f"**Leitud artikleid:** {articles_found}\n"
- markdown_content += f"**Usaldusväärsus:** {confidence:.2f}\n\n"
-
- # Põhivastus (answer)
- answer = query_result.get("answer", "")
- if answer:
- # Eemaldame liigsed reavahetused ja vormindame
- answer = re.sub(r'\n{3,}', '\n\n', answer.strip())
- markdown_content += "### Analüüs\n\n"
- markdown_content += f"{answer}\n\n"
-
- # Allikad
- sources = query_result.get("sources", [])
- if sources:
- markdown_content += "### Allikad\n\n"
- for j, source in enumerate(sources):
- title = source.get("title", "Pealkiri puudub")
- authors = ", ".join(source.get("authors", ["Autor puudub"]))
- year = source.get("year", "Aasta puudub")
- summary = source.get("summary", source.get("full_summary", "Kokkuvõte puudub"))
-
- # Võtame kokkuvõttest esimesed 2-3 lauset
- summary_preview = " ".join(summary.split()[:50])
- if len(summary.split()) > 50:
- summary_preview += "..."
-
- markdown_content += f"#### {j+1}. {title}\n"
- markdown_content += f"**Autor(id):** {authors}\n"
- markdown_content += f"**Aasta:** {year}\n"
- markdown_content += f"**Kokkuvõte:** {summary_preview}\n\n"
-
- markdown_content += "---\n\n"
-
- return markdown_content
- def convert_json_file_to_markdown(json_filename):
- # Loeme JSON-faili sisu (antud juhul on see juba muutujas)
- # Tegelikus rakenduses loeksime failist:
- with open(json_filename, 'r', encoding='utf-8') as f:
- json_data = json.load(f)
-
- # Teisenda Markdowniks
- # Kui on dict ja sisaldab päringu välju, pane listi
- if isinstance(json_data, dict):
- markdown_content = json_to_markdown([json_data])
- # Kui on list, kasuta otse
- elif isinstance(json_data, list):
- markdown_content = json_to_markdown(json_data)
- # Muudel juhtudel proovi listina
- else:
- markdown_content = "# Viga\n\nAndmed pole sobivas vormingus.\n"
-
- # Loo Markdown failinimi
- base_name = os.path.splitext(json_filename)[0]
- md_filename = f"{base_name}.md"
-
- # Salvesta Markdown fail
- with open(md_filename, 'w', encoding='utf-8') as f:
- f.write(markdown_content)
-
- print(f"✅ Markdown fail loodud: {md_filename}")
- return md_filename
- if __name__ == "__main__":
- #convert_json_file_to_markdown('tmp/query_summary_20260111_215300.json')
- import argparse
-
- parser = argparse.ArgumentParser(description="Transpordiartiklite päringumootor")
- parser.add_argument("--query", "-q", help="Üksik päring")
- parser.add_argument("--file", "-f", help="Päringute fail (üks päring real)")
- parser.add_argument("--type", "-t", choices=["semantic", "keyword", "hybrid"],
- default="hybrid", help="Otsingu tüüp")
- parser.add_argument("--context", "-c", choices=["detailed", "concise", "technical"],
- default="detailed", help="Vastuse tüüp")
- parser.add_argument("--interactive", "-i", action="store_true",
- help="Interaktiivne režiim")
-
- args = parser.parse_args()
-
- if args.interactive:
- interactive_mode()
- elif args.file:
- batch_mode(args.file)
- elif args.query:
- single_query(args.query, args.type, args.context)
- else:
- # Vaikimisi interaktiivne režiim
- interactive_mode()
|