query_client.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #!/usr/bin/env python3
  2. """
  3. Päringukliendi näide Weaviate'i baasile
  4. """
  5. import sys
  6. import os
  7. import json
  8. from datetime import datetime
  9. import re
  10. # Lisa src kaust Pythoni teele
  11. sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
  12. from src.query_engine import QueryEngine
  13. def print_result(result: dict):
  14. """Prindi tulemus ilusalt"""
  15. print("\n" + "="*80)
  16. print(f"PÄRING: {result.get('query', '')}")
  17. print(f"TÜÜP: {result.get('search_type', '')} | LEITI: {result.get('articles_found', 0)} artiklit")
  18. print(f"USALDUSVÄÄRSUS: {result.get('confidence', 0):.2%}")
  19. print("="*80)
  20. # Vastus
  21. print("\n📝 VASTUS:")
  22. print(result.get('answer', ''))
  23. # Allikad
  24. sources = result.get('sources', [])
  25. if sources:
  26. print(f"\n📚 ALLIKAD ({len(sources)}):")
  27. for i, source in enumerate(sources, 1):
  28. print(f"\n{i}. {source.get('title', '')}")
  29. print(f" Autorid: {', '.join(source.get('authors', []))}")
  30. print(f" Aasta: {source.get('year', '')} | Žurnaal: {source.get('journal', '')}")
  31. print(f" Relevantsus: {source.get('relevance_score', 5)}/10")
  32. print(f" Skoor: {source.get('score', 0):.3f}")
  33. if source.get('summary'):
  34. print(f" Kokkuvõte: {source.get('summary')}")
  35. print("\n" + "="*80)
  36. print(f"Kuupäev: {result.get('timestamp', '')}")
  37. def interactive_mode():
  38. """Interaktiivne režiim"""
  39. print("🤖 TRANSFORDIARTIKLITE PÄRINGUMOOTOR")
  40. print("="*60)
  41. print("Kasuta järgmisi käske:")
  42. print(" ? - Abi")
  43. print(" q - Välju")
  44. print(" t [päring] - Tehniline vastus")
  45. print(" l [päring] - Lühike vastus")
  46. print(" d [päring] - Detailne vastus (vaikimisi)")
  47. print(" s [päring] - Semantiline otsing")
  48. print(" k [päring] - Võtmesõnade otsing")
  49. print(" h [päring] - Hübriidotsing (vaikimisi)")
  50. print("="*60)
  51. engine = QueryEngine()
  52. while True:
  53. try:
  54. user_input = input("\n> ").strip()
  55. if not user_input:
  56. continue
  57. if user_input.lower() in ['q', 'quit', 'exit']:
  58. print("Head aega!")
  59. break
  60. if user_input == '?':
  61. print("""
  62. Käsud:
  63. t [päring] - Tehniline vastus (spetsialistidele)
  64. l [päring] - Lühike vastus (kokkuvõte)
  65. d [päring] - Detailne vastus (põhjalik analüüs)
  66. s [päring] - Semantiline otsing (mõttesisu)
  67. k [päring] - Võtmesõnade otsing (tekstipõhine)
  68. h [päring] - Hübriidotsing (mõlemad)
  69. Näited:
  70. t kuidas parandada teede ohutust
  71. l elektriautod linnaliikluses
  72. d jalgrataste ja autode jagatud teed
  73. """)
  74. continue
  75. # Parse command
  76. parts = user_input.split(' ', 1)
  77. if len(parts) < 2:
  78. print("❌ Vigane käsk. Kasuta: [käsk] [päring]")
  79. continue
  80. command, query = parts[0].lower(), parts[1]
  81. # Determine parameters
  82. if command == 't':
  83. context_type = "technical"
  84. search_type = "hybrid"
  85. elif command == 'l':
  86. context_type = "concise"
  87. search_type = "hybrid"
  88. elif command == 'd':
  89. context_type = "detailed"
  90. search_type = "hybrid"
  91. elif command == 's':
  92. context_type = "detailed"
  93. search_type = "semantic"
  94. elif command == 'k':
  95. context_type = "detailed"
  96. search_type = "keyword"
  97. elif command == 'h':
  98. context_type = "detailed"
  99. search_type = "hybrid"
  100. else:
  101. print(f"❌ Tundmatu käsk: {command}")
  102. continue
  103. print(f"🔍 Otsin: '{query}'...")
  104. result = engine.ask(query, search_type=search_type, context_type=context_type)
  105. print_result(result)
  106. except KeyboardInterrupt:
  107. print("\n\nVäljun...")
  108. break
  109. except Exception as e:
  110. print(f"❌ Viga: {str(e)}")
  111. engine.close()
  112. def batch_mode(queries_file: str):
  113. """Pakettrežiim - päringud failist"""
  114. try:
  115. with open(queries_file, 'r', encoding='utf-8') as f:
  116. queries = [line.strip() for line in f if line.strip()]
  117. except FileNotFoundError:
  118. print(f"❌ Faili {queries_file} ei leitud")
  119. return
  120. engine = QueryEngine()
  121. results = []
  122. print(f"⏳ Töötlen {len(queries)} päringut...")
  123. for i, query in enumerate(queries, 1):
  124. print(f" {i}/{len(queries)}: '{query}'")
  125. try:
  126. result = engine.ask(query)
  127. results.append(result)
  128. # Salvesta iga tulemus eraldi faili
  129. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  130. filename = f"tmp/query_result_{timestamp}_{i}.json"
  131. with open(filename, 'w', encoding='utf-8') as f:
  132. json.dump(result, f, ensure_ascii=False, indent=2)
  133. print(f" ✓ Salvestatud {filename}")
  134. # Konverteerime json'i markdown failiks
  135. convert_json_file_to_markdown(filename)
  136. except Exception as e:
  137. print(f" ✗ Viga: {str(e)}")
  138. results.append({
  139. "query": query,
  140. "error": str(e),
  141. "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  142. })
  143. engine.close()
  144. # Salvesta kõik tulemused ühte faili
  145. summary_file = f"tmp/query_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
  146. with open(summary_file, 'w', encoding='utf-8') as f:
  147. json.dump(results, f, ensure_ascii=False, indent=2)
  148. print(f"\n✅ Valmis! Tulemused salvestatud faili: {summary_file}")
  149. # Konverteerime json'i markdown failiks
  150. convert_json_file_to_markdown(summary_file)
  151. def single_query(query: str, search_type: str = "hybrid", context_type: str = "detailed"):
  152. """Üksik päring"""
  153. engine = QueryEngine()
  154. print(f"🔍 Päring: {query}")
  155. result = engine.ask(query, search_type=search_type, context_type=context_type)
  156. print_result(result)
  157. engine.close()
  158. # Salvesta tulemus
  159. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  160. filename = f"tmp/query_{timestamp}.json"
  161. with open(filename, 'w', encoding='utf-8') as f:
  162. json.dump(result, f, ensure_ascii=False, indent=2)
  163. print(f"📄 Tulemus salvestatud faili: {filename}")
  164. # Konverteerime json'i markdown failiks
  165. convert_json_file_to_markdown(filename)
  166. def json_to_markdown(json_data):
  167. """
  168. Konverteerib transpordiartiklite analüüsi JSON-vormingu Markdown-vormingusse
  169. """
  170. markdown_content = "# Transpordiartiklite analüüsi kokkuvõte\n\n"
  171. for i, query_result in enumerate(json_data):
  172. # Pealkiri
  173. query = query_result.get("query", f"Päring {i+1}")
  174. markdown_content += f"## {i+1}. {query}\n\n"
  175. # Metaandmed
  176. timestamp = query_result.get("timestamp", "")
  177. articles_found = query_result.get("articles_found", 0)
  178. confidence = query_result.get("confidence", 0)
  179. markdown_content += f"**Aeg:** {timestamp}\n"
  180. markdown_content += f"**Leitud artikleid:** {articles_found}\n"
  181. markdown_content += f"**Usaldusväärsus:** {confidence:.2f}\n\n"
  182. # Põhivastus (answer)
  183. answer = query_result.get("answer", "")
  184. if answer:
  185. # Eemaldame liigsed reavahetused ja vormindame
  186. answer = re.sub(r'\n{3,}', '\n\n', answer.strip())
  187. markdown_content += "### Analüüs\n\n"
  188. markdown_content += f"{answer}\n\n"
  189. # Allikad
  190. sources = query_result.get("sources", [])
  191. if sources:
  192. markdown_content += "### Allikad\n\n"
  193. for j, source in enumerate(sources):
  194. title = source.get("title", "Pealkiri puudub")
  195. authors = ", ".join(source.get("authors", ["Autor puudub"]))
  196. year = source.get("year", "Aasta puudub")
  197. summary = source.get("summary", source.get("full_summary", "Kokkuvõte puudub"))
  198. # Võtame kokkuvõttest esimesed 2-3 lauset
  199. summary_preview = " ".join(summary.split()[:50])
  200. if len(summary.split()) > 50:
  201. summary_preview += "..."
  202. markdown_content += f"#### {j+1}. {title}\n"
  203. markdown_content += f"**Autor(id):** {authors}\n"
  204. markdown_content += f"**Aasta:** {year}\n"
  205. markdown_content += f"**Kokkuvõte:** {summary_preview}\n\n"
  206. markdown_content += "---\n\n"
  207. return markdown_content
  208. def convert_json_file_to_markdown(json_filename):
  209. # Loeme JSON-faili sisu (antud juhul on see juba muutujas)
  210. # Tegelikus rakenduses loeksime failist:
  211. with open(json_filename, 'r', encoding='utf-8') as f:
  212. json_data = json.load(f)
  213. # Teisenda Markdowniks
  214. # Kui on dict ja sisaldab päringu välju, pane listi
  215. if isinstance(json_data, dict):
  216. markdown_content = json_to_markdown([json_data])
  217. # Kui on list, kasuta otse
  218. elif isinstance(json_data, list):
  219. markdown_content = json_to_markdown(json_data)
  220. # Muudel juhtudel proovi listina
  221. else:
  222. markdown_content = "# Viga\n\nAndmed pole sobivas vormingus.\n"
  223. # Loo Markdown failinimi
  224. base_name = os.path.splitext(json_filename)[0]
  225. md_filename = f"{base_name}.md"
  226. # Salvesta Markdown fail
  227. with open(md_filename, 'w', encoding='utf-8') as f:
  228. f.write(markdown_content)
  229. print(f"✅ Markdown fail loodud: {md_filename}")
  230. return md_filename
  231. if __name__ == "__main__":
  232. #convert_json_file_to_markdown('tmp/query_summary_20260111_215300.json')
  233. import argparse
  234. parser = argparse.ArgumentParser(description="Transpordiartiklite päringumootor")
  235. parser.add_argument("--query", "-q", help="Üksik päring")
  236. parser.add_argument("--file", "-f", help="Päringute fail (üks päring real)")
  237. parser.add_argument("--type", "-t", choices=["semantic", "keyword", "hybrid"],
  238. default="hybrid", help="Otsingu tüüp")
  239. parser.add_argument("--context", "-c", choices=["detailed", "concise", "technical"],
  240. default="detailed", help="Vastuse tüüp")
  241. parser.add_argument("--interactive", "-i", action="store_true",
  242. help="Interaktiivne režiim")
  243. args = parser.parse_args()
  244. if args.interactive:
  245. interactive_mode()
  246. elif args.file:
  247. batch_mode(args.file)
  248. elif args.query:
  249. single_query(args.query, args.type, args.context)
  250. else:
  251. # Vaikimisi interaktiivne režiim
  252. interactive_mode()