| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- #!/usr/bin/env python3
- """
- Teadusartiklite CURL päring → Markdown → PDF konverter
- Kasutab jq asemel Pythoni JSON parsimist, et vältida HTML/JSON escaping probleeme
- """
- import subprocess
- import json
- import re
- from datetime import datetime
- # GraphQL päring
- QUERY = {
- "query": "{ Get { ScientificArticle { title source_file summary_et transport_context } } }"
- }
- def fetch_articles():
- """Toob artiklid Weaviate GraphQL API-st"""
- print("📡 Toon artikleid GraphQL API-st...")
-
- try:
- result = subprocess.run(
- [
- 'curl',
- '-s',
- 'http://100.80.222.54:9020/v1/graphql',
- '-X', 'POST',
- '-H', 'Content-Type: application/json',
- '-d', json.dumps(QUERY)
- ],
- capture_output=True,
- text=True,
- timeout=30
- )
-
- if result.returncode != 0:
- print(f"❌ CURL viga: {result.stderr}")
- return None
-
- data = json.loads(result.stdout)
- articles = data.get('data', {}).get('Get', {}).get('ScientificArticle', [])
- print(f"✅ Leidsin {len(articles)} artiklit")
- return articles
-
- except Exception as e:
- print(f"❌ Viga: {e}")
- return None
- def extract_transport_context(transport_context):
- """Eraldab transport_context JSON-i võtmväljad loetaval kujul"""
-
- if not transport_context:
- return "Andmeid pole saadaval"
-
- result_parts = []
-
- try:
- # Kui transport_context on string, parsime seda
- if isinstance(transport_context, str):
- # Eemalda HTML/JSON sildid
- text = transport_context
- text = re.sub(r'```json\s*', '', text)
- text = re.sub(r'\s*```', '', text)
- text = text.strip()
-
- # Proovime JSON-i parsida
- try:
- obj = json.loads(text)
- except:
- # Kui ei õnnestu, tagastame stringi
- return f"```\n{text[:500]}...\n```"
- else:
- obj = transport_context
-
- # Parsime 'analysis' välja kui see on string
- if isinstance(obj, dict) and 'analysis' in obj:
- analysis = obj['analysis']
- if isinstance(analysis, str):
- # Puhastame JSON markerid
- analysis = re.sub(r'```json\s*', '', analysis)
- analysis = re.sub(r'\s*```', '', analysis)
- analysis = analysis.strip()
-
- # Parsime JSON
- try:
- analysis_obj = json.loads(analysis)
- obj = analysis_obj
- except:
- # Kui ei õnnestu, kasutame regex'i
- pass
-
- # Eraldame võtmväljad
- if isinstance(obj, dict):
-
- # Teoreetiline panus
- if obj.get('theoretical_contribution'):
- result_parts.append(
- "#### Teoreetiline panus\n\n" +
- obj['theoretical_contribution']
- )
-
- # Praktiline rakendatavus
- if obj.get('practical_applicability'):
- result_parts.append(
- "#### Praktiline rakendatavus\n\n" +
- obj['practical_applicability']
- )
-
- # Probleemilahendus
- if obj.get('problem_solving'):
- result_parts.append(
- "#### Probleemilahendus\n\n" +
- obj['problem_solving']
- )
-
- # Piirangud
- if obj.get('limitations'):
- result_parts.append(
- "#### Piirangud\n\n" +
- obj['limitations']
- )
-
- # Relevantsuse skoor
- score = obj.get('relevance_score')
- if score is not None:
- result_parts.append(
- f"**Relevantsuse skoor:** {score}/10"
- )
-
- return "\n\n".join(result_parts) if result_parts else "Andmeid pole saadaval"
-
- except Exception as e:
- return f"Viga parsimisega: {str(e)}"
- def generate_markdown(articles):
- """Genereerib markdown faili artiklitest"""
-
- print("✍️ Genereerin markdown faili...")
-
- # CSS lehevahetuste jaoks
- css_header = """<style>
- @media print {{
- h2 {{
- page-break-before: always;
- }}
- h2:first-of-type {{
- page-break-before: avoid;
- }}
- }}
- </style>
- # Teadusartiklite analüüs ja transpordiplaneerimise kontekst
- Eksporditud: **{timestamp}**
- Artikleid kokku: **{count}**
- ---
- """.format(
- timestamp=datetime.now().strftime("%d.%m.%Y %H:%M"),
- count=len(articles)
- )
-
- # Genereerime artiklite sektsioonid
- content = css_header
-
- for i, article in enumerate(articles, 1):
-
- # Artikli pealkiri ja metadata
- content += f"\n## {i}. {article['title']}\n\n"
-
- # Allikfail
- source = article.get('source_file', 'N/A')
- if source:
- source_name = source.split('/')[-1] # Võta ainult failinimi
- content += f"**Allikfail:** `{source_name}`\n\n"
-
- # Kokkuvõte
- summary = article.get('summary_et', '')
- if summary:
- content += "### Kokkuvõte (eesti keeles)\n\n"
- content += summary + "\n\n"
-
- # Transpordiplaneerimise kontekst
- transport = article.get('transport_context')
- if transport:
- content += "### Transpordiplaneerimise kontekst\n\n"
- context_text = extract_transport_context(transport)
- content += context_text + "\n\n"
-
- content += "---\n"
-
- # Lõppeinfo
- content += f"""
- ## Lõppinfo
- - **Eksporditud:** {datetime.now().strftime("%d.%m.%Y %H:%M:%S")}
- - **Kokku artikle:** {len(articles)}
- - **Allikas:** Weaviate teadusartiklite andmebaas
- Fail konverteeritud Markdown → PDF VS Code Markdown PDF laiendusega.
- """
-
- return content
- def save_markdown(content, filepath):
- """Salvestab markdown failina"""
- try:
- with open(filepath, 'w', encoding='utf-8') as f:
- f.write(content)
- print(f"✅ Markdown fail salvestatud: {filepath}")
- return True
- except Exception as e:
- print(f"❌ Viga faili salvestamisel: {e}")
- return False
- def main():
- print("=" * 60)
- print("TEADUSARTIKLITE EKSPORT MARKDOWN/PDF FORMAATI")
- print("=" * 60)
-
- # 1. Toome artikleid
- articles = fetch_articles()
- if not articles:
- print("❌ Viga: ei saanud artikleid tuua")
- return
-
- # 2. Genereerime markdown
- markdown = generate_markdown(articles)
-
- # 3. Salvestame
- output_path = "/home/ardo/Downloads/articles_with_transport_context.md"
- if save_markdown(markdown, output_path):
- print("\n" + "=" * 60)
- print("✅ VALMIS!")
- print("=" * 60)
- print(f"\n📄 Markdown fail: {output_path}")
- print("\n🚀 Järgmised sammud:")
- print(" 1. Avage fail VS Code'is")
- print(" 2. Paremklõps peal → 'Markdown PDF: Export (pdf)'")
- print(" 3. PDF fail luuakse samasse kausta")
- print("\n💡 Nipp: Iga artikkel algab uuelt lehelt!")
- else:
- print("❌ Viga: ei saanud faili salvestada")
- if __name__ == "__main__":
- main()
|