fetch_and_convert.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. #!/usr/bin/env python3
  2. """
  3. Teadusartiklite CURL päring → Markdown → PDF konverter
  4. Kasutab jq asemel Pythoni JSON parsimist, et vältida HTML/JSON escaping probleeme
  5. """
  6. import subprocess
  7. import json
  8. import re
  9. from datetime import datetime
  10. # GraphQL päring
  11. QUERY = {
  12. "query": "{ Get { ScientificArticle { title source_file summary_et transport_context } } }"
  13. }
  14. def fetch_articles():
  15. """Toob artiklid Weaviate GraphQL API-st"""
  16. print("📡 Toon artikleid GraphQL API-st...")
  17. try:
  18. result = subprocess.run(
  19. [
  20. 'curl',
  21. '-s',
  22. 'http://100.80.222.54:9020/v1/graphql',
  23. '-X', 'POST',
  24. '-H', 'Content-Type: application/json',
  25. '-d', json.dumps(QUERY)
  26. ],
  27. capture_output=True,
  28. text=True,
  29. timeout=30
  30. )
  31. if result.returncode != 0:
  32. print(f"❌ CURL viga: {result.stderr}")
  33. return None
  34. data = json.loads(result.stdout)
  35. articles = data.get('data', {}).get('Get', {}).get('ScientificArticle', [])
  36. print(f"✅ Leidsin {len(articles)} artiklit")
  37. return articles
  38. except Exception as e:
  39. print(f"❌ Viga: {e}")
  40. return None
  41. def extract_transport_context(transport_context):
  42. """Eraldab transport_context JSON-i võtmväljad loetaval kujul"""
  43. if not transport_context:
  44. return "Andmeid pole saadaval"
  45. result_parts = []
  46. try:
  47. # Kui transport_context on string, parsime seda
  48. if isinstance(transport_context, str):
  49. # Eemalda HTML/JSON sildid
  50. text = transport_context
  51. text = re.sub(r'```json\s*', '', text)
  52. text = re.sub(r'\s*```', '', text)
  53. text = text.strip()
  54. # Proovime JSON-i parsida
  55. try:
  56. obj = json.loads(text)
  57. except:
  58. # Kui ei õnnestu, tagastame stringi
  59. return f"```\n{text[:500]}...\n```"
  60. else:
  61. obj = transport_context
  62. # Parsime 'analysis' välja kui see on string
  63. if isinstance(obj, dict) and 'analysis' in obj:
  64. analysis = obj['analysis']
  65. if isinstance(analysis, str):
  66. # Puhastame JSON markerid
  67. analysis = re.sub(r'```json\s*', '', analysis)
  68. analysis = re.sub(r'\s*```', '', analysis)
  69. analysis = analysis.strip()
  70. # Parsime JSON
  71. try:
  72. analysis_obj = json.loads(analysis)
  73. obj = analysis_obj
  74. except:
  75. # Kui ei õnnestu, kasutame regex'i
  76. pass
  77. # Eraldame võtmväljad
  78. if isinstance(obj, dict):
  79. # Teoreetiline panus
  80. if obj.get('theoretical_contribution'):
  81. result_parts.append(
  82. "#### Teoreetiline panus\n\n" +
  83. obj['theoretical_contribution']
  84. )
  85. # Praktiline rakendatavus
  86. if obj.get('practical_applicability'):
  87. result_parts.append(
  88. "#### Praktiline rakendatavus\n\n" +
  89. obj['practical_applicability']
  90. )
  91. # Probleemilahendus
  92. if obj.get('problem_solving'):
  93. result_parts.append(
  94. "#### Probleemilahendus\n\n" +
  95. obj['problem_solving']
  96. )
  97. # Piirangud
  98. if obj.get('limitations'):
  99. result_parts.append(
  100. "#### Piirangud\n\n" +
  101. obj['limitations']
  102. )
  103. # Relevantsuse skoor
  104. score = obj.get('relevance_score')
  105. if score is not None:
  106. result_parts.append(
  107. f"**Relevantsuse skoor:** {score}/10"
  108. )
  109. return "\n\n".join(result_parts) if result_parts else "Andmeid pole saadaval"
  110. except Exception as e:
  111. return f"Viga parsimisega: {str(e)}"
  112. def generate_markdown(articles):
  113. """Genereerib markdown faili artiklitest"""
  114. print("✍️ Genereerin markdown faili...")
  115. # CSS lehevahetuste jaoks
  116. css_header = """<style>
  117. @media print {{
  118. h2 {{
  119. page-break-before: always;
  120. }}
  121. h2:first-of-type {{
  122. page-break-before: avoid;
  123. }}
  124. }}
  125. </style>
  126. # Teadusartiklite analüüs ja transpordiplaneerimise kontekst
  127. Eksporditud: **{timestamp}**
  128. Artikleid kokku: **{count}**
  129. ---
  130. """.format(
  131. timestamp=datetime.now().strftime("%d.%m.%Y %H:%M"),
  132. count=len(articles)
  133. )
  134. # Genereerime artiklite sektsioonid
  135. content = css_header
  136. for i, article in enumerate(articles, 1):
  137. # Artikli pealkiri ja metadata
  138. content += f"\n## {i}. {article['title']}\n\n"
  139. # Allikfail
  140. source = article.get('source_file', 'N/A')
  141. if source:
  142. source_name = source.split('/')[-1] # Võta ainult failinimi
  143. content += f"**Allikfail:** `{source_name}`\n\n"
  144. # Kokkuvõte
  145. summary = article.get('summary_et', '')
  146. if summary:
  147. content += "### Kokkuvõte (eesti keeles)\n\n"
  148. content += summary + "\n\n"
  149. # Transpordiplaneerimise kontekst
  150. transport = article.get('transport_context')
  151. if transport:
  152. content += "### Transpordiplaneerimise kontekst\n\n"
  153. context_text = extract_transport_context(transport)
  154. content += context_text + "\n\n"
  155. content += "---\n"
  156. # Lõppeinfo
  157. content += f"""
  158. ## Lõppinfo
  159. - **Eksporditud:** {datetime.now().strftime("%d.%m.%Y %H:%M:%S")}
  160. - **Kokku artikle:** {len(articles)}
  161. - **Allikas:** Weaviate teadusartiklite andmebaas
  162. Fail konverteeritud Markdown → PDF VS Code Markdown PDF laiendusega.
  163. """
  164. return content
  165. def save_markdown(content, filepath):
  166. """Salvestab markdown failina"""
  167. try:
  168. with open(filepath, 'w', encoding='utf-8') as f:
  169. f.write(content)
  170. print(f"✅ Markdown fail salvestatud: {filepath}")
  171. return True
  172. except Exception as e:
  173. print(f"❌ Viga faili salvestamisel: {e}")
  174. return False
  175. def main():
  176. print("=" * 60)
  177. print("TEADUSARTIKLITE EKSPORT MARKDOWN/PDF FORMAATI")
  178. print("=" * 60)
  179. # 1. Toome artikleid
  180. articles = fetch_articles()
  181. if not articles:
  182. print("❌ Viga: ei saanud artikleid tuua")
  183. return
  184. # 2. Genereerime markdown
  185. markdown = generate_markdown(articles)
  186. # 3. Salvestame
  187. output_path = "/home/ardo/Downloads/articles_with_transport_context.md"
  188. if save_markdown(markdown, output_path):
  189. print("\n" + "=" * 60)
  190. print("✅ VALMIS!")
  191. print("=" * 60)
  192. print(f"\n📄 Markdown fail: {output_path}")
  193. print("\n🚀 Järgmised sammud:")
  194. print(" 1. Avage fail VS Code'is")
  195. print(" 2. Paremklõps peal → 'Markdown PDF: Export (pdf)'")
  196. print(" 3. PDF fail luuakse samasse kausta")
  197. print("\n💡 Nipp: Iga artikkel algab uuelt lehelt!")
  198. else:
  199. print("❌ Viga: ei saanud faili salvestada")
  200. if __name__ == "__main__":
  201. main()