|
|
@@ -1,43 +1,45 @@
|
|
|
-# save_articles_to_pdf.py
|
|
|
-
|
|
|
import os
|
|
|
import sys
|
|
|
import re
|
|
|
from datetime import datetime
|
|
|
-from reportlab.lib.pagesizes import letter, A4
|
|
|
+
|
|
|
+from reportlab.lib.pagesizes import A4
|
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
|
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
|
-from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER, TA_LEFT
|
|
|
+from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT
|
|
|
from reportlab.lib import colors
|
|
|
-from reportlab.lib.units import inch, cm
|
|
|
-from reportlab.pdfbase import pdfmetrics
|
|
|
-from reportlab.pdfbase.ttfonts import TTFont
|
|
|
+from reportlab.lib.units import cm
|
|
|
+
|
|
|
import json
|
|
|
+import traceback
|
|
|
|
|
|
# Lisa src kaust Pythoni teele
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
-
|
|
|
from src.weaviate_client import WeaviateClient
|
|
|
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# STRINGI PUHASTAMISE FUNKTSIOONID
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
def clean_html_tags(text):
|
|
|
"""Puhasta tekst HTML/XML siltidest ReportLab-i jaoks"""
|
|
|
if not text:
|
|
|
return ""
|
|
|
-
|
|
|
- # Eemalda kõik HTML/XML siltid
|
|
|
+ # Eemalda kõik HTML/XML sildid
|
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
|
-
|
|
|
+
|
|
|
# Asenda erimärgid ReportLab-ile sobivate märkidega
|
|
|
replacements = {
|
|
|
- ' ': ' ',
|
|
|
- '&': '&',
|
|
|
- '<': '<',
|
|
|
- '>': '>',
|
|
|
- '"': '"',
|
|
|
- ''': "'",
|
|
|
- ''': "'",
|
|
|
- '\u00a0': ' ', # mitte-tühik
|
|
|
- '\u2026': '...', # ellipsis
|
|
|
+ ' ': ' ',
|
|
|
+ '&': '&',
|
|
|
+ '<': '<',
|
|
|
+ '>': '>',
|
|
|
+ '"': '"',
|
|
|
+ '’': "'",
|
|
|
+ '‘': "'",
|
|
|
+ '\u00a0': ' ', # mitte-tühik
|
|
|
+ '\u2026': '...', # ellipsis
|
|
|
'\u2013': '-', # n-sild
|
|
|
'\u2014': '-', # m-sild
|
|
|
'\u2018': "'", # vasak ülakoma
|
|
|
@@ -45,206 +47,327 @@ def clean_html_tags(text):
|
|
|
'\u201c': '"', # vasak jutumärk
|
|
|
'\u201d': '"', # parem jutumärk
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
for old, new in replacements.items():
|
|
|
text = text.replace(old, new)
|
|
|
-
|
|
|
+
|
|
|
return text
|
|
|
|
|
|
+
|
|
|
def clean_markdown_for_pdf(text):
|
|
|
"""Konverteeri markdown ReportLab-ile sobivaks tekstiks"""
|
|
|
if not text:
|
|
|
return ""
|
|
|
-
|
|
|
+
|
|
|
# Kui ei ole string, konverteeri stringiks
|
|
|
if not isinstance(text, str):
|
|
|
text = str(text)
|
|
|
-
|
|
|
- # Eemalda HTML siltid
|
|
|
+
|
|
|
+ # Eemalda HTML sildid
|
|
|
text = clean_html_tags(text)
|
|
|
-
|
|
|
- # Asenda markdown pealkirjad
|
|
|
- text = re.sub(r'#{1,6}\s+', '', text) # Eemalda # pealkirjad
|
|
|
- text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # Asenda **bold** lihtsalt tekstiga
|
|
|
- text = re.sub(r'\*(.+?)\*', r'\1', text) # Asenda *italic* lihtsalt tekstiga
|
|
|
- text = re.sub(r'__(.+?)__', r'\1', text) # Asenda __underline__ lihtsalt tekstiga
|
|
|
- text = re.sub(r'~~(.+?)~~', r'\1', text) # Asenda ~~strikethrough~~ lihtsalt tekstiga
|
|
|
-
|
|
|
- # Asenda loetelud (PARANDATUD: kasuta \\1 mitte \1)
|
|
|
+
|
|
|
+ # Asenda markdown pealkirjad (# # # jne)
|
|
|
+ text = re.sub(r'#{1,6}\s+', '', text)
|
|
|
+
|
|
|
+ # Asenda bold, italic, strikethrough markeeringud
|
|
|
+ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # **bold**
|
|
|
+ text = re.sub(r'\*(.+?)\*', r'\1', text) # *italic*
|
|
|
+ text = re.sub(r'__(.+?)__', r'\1', text) # __underline__
|
|
|
+ text = re.sub(r'~~(.+?)~~', r'\1', text) # ~~strikethrough~~
|
|
|
+
|
|
|
+ # Asenda loetelud
|
|
|
text = re.sub(r'^\s*[-*+]\s+', '• ', text, flags=re.MULTILINE)
|
|
|
- text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) # PARANDATUD
|
|
|
-
|
|
|
- # Asenda koodiblokid
|
|
|
- text = re.sub(r'```[^`]+```', '', text) # Eemalda koodiblokid
|
|
|
- text = re.sub(r'`([^`]+)`', r'[\1]', text) # Asenda inline kood
|
|
|
-
|
|
|
- # Asenda lingid
|
|
|
- text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Eemalda lingid
|
|
|
-
|
|
|
+ text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE)
|
|
|
+
|
|
|
+ # Eemalda koodiblokid
|
|
|
+ text = re.sub(r'```[^`]+```', '', text)
|
|
|
+ text = re.sub(r'`([^`]+)`', r'[\1]', text)
|
|
|
+
|
|
|
+ # Eemalda lingid (jäta ainult tekst)
|
|
|
+ text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
|
|
+
|
|
|
# Eemalda liigsed tühikud
|
|
|
text = re.sub(r'\s+', ' ', text)
|
|
|
-
|
|
|
+
|
|
|
return text.strip()
|
|
|
|
|
|
-def clean_json_markers(text):
|
|
|
+
|
|
|
+def format_summary_for_pdf(summary):
|
|
|
+ """Vorminda kokkuvõte PDF-ile sobivaks"""
|
|
|
+ if not summary:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ # Kui ei ole string, konverteeri
|
|
|
+ if not isinstance(summary, str):
|
|
|
+ summary = str(summary)
|
|
|
+
|
|
|
+ # Eemalda kõik vormindus
|
|
|
+ summary = clean_markdown_for_pdf(summary)
|
|
|
+
|
|
|
+ # Lisa uued read peamiste sektsioonide ette
|
|
|
+ summary = summary.replace('1. ARTIKLI PEAMISED PUNKTID:', '\n1. ARTIKLI PEAMISED PUNKTID:\n')
|
|
|
+ summary = summary.replace('2. KASUTATUD MEETODID:', '\n\n2. KASUTATUD MEETODID:\n')
|
|
|
+ summary = summary.replace('3. PEAMISED TULEMUSED:', '\n\n3. PEAMISED TULEMUSED:\n')
|
|
|
+ summary = summary.replace('4. JÄRELDUSED JA SOOVITUSED:', '\n\n4. JÄRELDUSED JA SOOVITUSED:\n')
|
|
|
+ summary = summary.replace('5. TRANSFORDIPLANEERIMISE KONTEKST:', '\n\n5. TRANSFORDIPLANEERIMISE KONTEKST:\n')
|
|
|
+
|
|
|
+ # Asenda liigsed reavahetused
|
|
|
+ summary = re.sub(r'\n{3,}', '\n\n', summary)
|
|
|
+
|
|
|
+ # Lõika liiga pikk tekst
|
|
|
+ if len(summary) > 4000:
|
|
|
+ summary = summary[:4000] + "... [kokkuvõte lõigatud, liiga pikk]"
|
|
|
+
|
|
|
+ return summary
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# TRANSPORT KONTEKSTI PARSING
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+def extract_json_field(json_string, field_name):
|
|
|
"""
|
|
|
- Eemaldab JSON ümber olevad ```json ja ``` markerid
|
|
|
+ Eralda JSON stringist konkreetne väli regex abil.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ json_string: JSON tekst stringina
|
|
|
+ field_name: välja nimi (nt "theoretical_contribution")
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Välja väärtus või None
|
|
|
"""
|
|
|
- # Eemalda algusest
|
|
|
- text = re.sub(r'^```json\s*', '', text, flags=re.MULTILINE)
|
|
|
- # Eemalda lõpust
|
|
|
- text = re.sub(r'\s*```$', '', text, flags=re.MULTILINE)
|
|
|
- # Eemalda kõikjalt kui on
|
|
|
- text = re.sub(r'\s*```(json)?\s*', '', text)
|
|
|
- return text.strip()
|
|
|
+ if not json_string:
|
|
|
+ return None
|
|
|
|
|
|
-def clean_json_string(text):
|
|
|
- """Puhasta JSON string"""
|
|
|
- # Eemalda reavahetused ja liigsed tühikud
|
|
|
- text = text.replace('\n', '').replace('\r', '')
|
|
|
- # Asenda \\" tavaliste jutumärkidega
|
|
|
- text = text.replace('\\"', '"')
|
|
|
- # Eemalda esimesed ja viimased jutumärgid kui vaja
|
|
|
- if text.startswith('"') and text.endswith('"'):
|
|
|
- text = text[1:-1]
|
|
|
- return text
|
|
|
+ pattern = rf'"{field_name}":\s*"([^"]*(?:\\"[^"]*)*)"'
|
|
|
+ match = re.search(pattern, json_string)
|
|
|
+ if match:
|
|
|
+ return match.group(1)
|
|
|
+ return None
|
|
|
|
|
|
-def extract_and_format_json(data):
|
|
|
- """Eralda ja vorminda JSON andmed"""
|
|
|
- formatted_parts = []
|
|
|
-
|
|
|
- # Võti-väärtus paaride kaart
|
|
|
- key_map = {
|
|
|
- "theoretical_contribution": "Theoretical contribution",
|
|
|
- "practical_applicability": "Practical applicability",
|
|
|
- "problem_solving": "Problem solving",
|
|
|
- "limitations": "Limitations",
|
|
|
- "future_research": "Future research",
|
|
|
- "methodology": "Methodology"
|
|
|
- }
|
|
|
+
|
|
|
+def extract_relevance_score(json_string):
|
|
|
+ """Eralda relevance_score JSON stringist või tekstist."""
|
|
|
+ if not json_string:
|
|
|
+ return None
|
|
|
+
|
|
|
+ pattern = r'"relevance_score":\s*(\d+(?:\.\d+)?)'
|
|
|
+ match = re.search(pattern, json_string)
|
|
|
+ if match:
|
|
|
+ return match.group(1)
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def format_transport_context(transport_context):
|
|
|
+ """
|
|
|
+ Vorminda transpordi kontekst PDF-ile.
|
|
|
+ """
|
|
|
+ if not transport_context:
|
|
|
+ return None
|
|
|
+
|
|
|
+ # PARANDUS: Leia analüüsi tekst
|
|
|
+ analysis_text = None
|
|
|
|
|
|
- for key, title in key_map.items():
|
|
|
- if key in data and data[key]:
|
|
|
- formatted_parts.append(f"{title}")
|
|
|
- formatted_parts.append(str(data[key]))
|
|
|
- formatted_parts.append("") # tühi rida
|
|
|
+ # Kui on dict ja sisaldab 'analysis' võtit
|
|
|
+ if isinstance(transport_context, dict):
|
|
|
+ if 'analysis' in transport_context:
|
|
|
+ analysis_text = transport_context['analysis']
|
|
|
+ else:
|
|
|
+ # Võib-olla kogu dict ON juba analysis?
|
|
|
+ analysis_text = str(transport_context)
|
|
|
+ elif isinstance(transport_context, str):
|
|
|
+ analysis_text = transport_context
|
|
|
+ else:
|
|
|
+ return None
|
|
|
+
|
|
|
+ if not analysis_text:
|
|
|
+ return None
|
|
|
+
|
|
|
+ txt = str(analysis_text)
|
|
|
|
|
|
- return "\n".join(formatted_parts)
|
|
|
+ # EEMALDA ```json ... ``` markerid
|
|
|
+ txt = re.sub(r'```json\s*', '', txt)
|
|
|
+ txt = re.sub(r'\s*```', '', txt)
|
|
|
+ txt = txt.strip()
|
|
|
+
|
|
|
+ # DEBUG: Prindi välja pärast puhastamist
|
|
|
+ #print("DEBUG after cleanup:", txt[:200])
|
|
|
+
|
|
|
+ parsed = None
|
|
|
|
|
|
-def process_json_text(input_text):
|
|
|
- """Pööra JSON tekst loetavaks vorminguks"""
|
|
|
+ # 1) Proovi parsida JSON otse
|
|
|
try:
|
|
|
- # Parse esimene kiht
|
|
|
- parsed = json.loads(input_text)
|
|
|
-
|
|
|
- # Otsi analüüsi andmeid
|
|
|
- analysis_data = None
|
|
|
-
|
|
|
- # Variant 1: "analysis" väljal on JSON string
|
|
|
- if "analysis" in parsed:
|
|
|
+ parsed = json.loads(txt)
|
|
|
+ #print("✅ JSON parsed successfully!")
|
|
|
+ #print(f"DEBUG parsed keys: {list(parsed.keys())}")
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ #print(f"❌ JSON parse failed: {e}")
|
|
|
+ # Kui ei õnnestu, proovi leida {...} blokk
|
|
|
+ m = re.search(r'\{.*\}', txt, flags=re.DOTALL)
|
|
|
+ if m:
|
|
|
+ json_candidate = m.group(0)
|
|
|
try:
|
|
|
- # Puhasta ja parse sisemine JSON
|
|
|
- clean_analysis = clean_json_string(str(parsed["analysis"]))
|
|
|
- analysis_data = json.loads(clean_analysis)
|
|
|
- except:
|
|
|
- # Kui ei saa JSON-iks, kasuta otse
|
|
|
- analysis_data = parsed.get("analysis", {})
|
|
|
-
|
|
|
- # Variant 2: andmed otse pealkirjade all
|
|
|
- elif any(key in parsed for key in ["theoretical_contribution", "practical_applicability"]):
|
|
|
- analysis_data = parsed
|
|
|
-
|
|
|
- # Variant 3: teised võimalused
|
|
|
- else:
|
|
|
- # Proovi leida JSON kuskil mujal
|
|
|
- for key, value in parsed.items():
|
|
|
- if isinstance(value, str) and any(x in value.lower() for x in ["theoretical", "practical", "contribution"]):
|
|
|
- try:
|
|
|
- clean_val = clean_json_string(value)
|
|
|
- analysis_data = json.loads(clean_val)
|
|
|
- break
|
|
|
- except:
|
|
|
- continue
|
|
|
-
|
|
|
- if analysis_data:
|
|
|
- return extract_and_format_json(analysis_data)
|
|
|
- else:
|
|
|
- return "No analysis data found in JSON"
|
|
|
+ parsed = json.loads(json_candidate)
|
|
|
+ #print("✅ JSON parsed from candidate!")
|
|
|
+ except json.JSONDecodeError as e2:
|
|
|
+ print(f"❌ Candidate parse also failed: {e2}")
|
|
|
+ parsed = None
|
|
|
+
|
|
|
+ formatted_parts = []
|
|
|
+
|
|
|
+ # Kui JSON parsimine õnnestus
|
|
|
+ if isinstance(parsed, dict):
|
|
|
+ #print("✅ Using parsed JSON dict")
|
|
|
+
|
|
|
+ # KUI parsed sisaldab 'analysis' võtit, siis see on VEEL ÜKS string!
|
|
|
+ # Peame seda UUESTI parsima!
|
|
|
+ # KUI parsed sisaldab 'analysis' võtit, siis see on VEEL ÜKS string!
|
|
|
+ if 'analysis' in parsed and isinstance(parsed['analysis'], str):
|
|
|
+ #print("⚠️ 'analysis' is still a string, parsing again...")
|
|
|
+ inner_txt = parsed['analysis']
|
|
|
+ # Eemalda ```json markerid uuesti
|
|
|
+ inner_txt = re.sub(r'```json\s*', '', inner_txt)
|
|
|
+ inner_txt = re.sub(r'\s*```', '', inner_txt)
|
|
|
+ inner_txt = inner_txt.strip()
|
|
|
+ # PARANDUS: Asenda valed escaped quotes
|
|
|
+ # \"word\" → "word" (ainult siis kui on tähtede vahel)
|
|
|
+ inner_txt = re.sub(r'\\"([a-züõäöA-ZÜÕÄÖ]+)\\"', r'"\1"', inner_txt)
|
|
|
|
|
|
- except json.JSONDecodeError as e:
|
|
|
- return f"JSON parsing error: {str(e)}"
|
|
|
- except Exception as e:
|
|
|
- return f"Error: {str(e)}"
|
|
|
-
|
|
|
-def parse_transport_context(context_data):
|
|
|
- context_data = clean_json_markers(context_data)
|
|
|
- """Parsi transpordikonteksti JSON-ist loetavaks"""
|
|
|
- if isinstance(context_data, str):
|
|
|
- # Proovi parsida string JSON-iks
|
|
|
- try:
|
|
|
- return json.loads(context_data)
|
|
|
- except json.JSONDecodeError:
|
|
|
- # Kui ei õnnestu, tagasta puhastatud string
|
|
|
- return {"raw_analysis": clean_markdown_for_pdf(context_data)}
|
|
|
- elif isinstance(context_data, dict):
|
|
|
- # Puhasta kõik stringiväljad
|
|
|
- cleaned = {}
|
|
|
- for key, value in context_data.items():
|
|
|
- if isinstance(value, str):
|
|
|
- cleaned[key] = clean_markdown_for_pdf(value)
|
|
|
- else:
|
|
|
- cleaned[key] = value
|
|
|
- return cleaned
|
|
|
- return context_data
|
|
|
-
|
|
|
-def format_context_for_pdf(parsed_context):
|
|
|
- """Vorminda parsitud kontekst PDF-i jaoks"""
|
|
|
- if isinstance(parsed_context, dict):
|
|
|
- formatted = []
|
|
|
-
|
|
|
- if 'theoretical_contribution' in parsed_context and parsed_context['theoretical_contribution']:
|
|
|
- formatted.append(f"TEOREETILINE PANUS: {parsed_context['theoretical_contribution']}")
|
|
|
-
|
|
|
- if 'practical_applicability' in parsed_context and parsed_context['practical_applicability']:
|
|
|
- formatted.append(f"PRAKTILINE RAKENDATAVUS: {parsed_context['practical_applicability']}")
|
|
|
-
|
|
|
- if 'problem_solving' in parsed_context and parsed_context['problem_solving']:
|
|
|
- formatted.append(f"PROBLEEMILAHDUS: {parsed_context['problem_solving']}")
|
|
|
-
|
|
|
- if 'limitations' in parsed_context and parsed_context['limitations']:
|
|
|
- formatted.append(f"PIIRANGUD: {parsed_context['limitations']}")
|
|
|
-
|
|
|
- if 'relevance_score' in parsed_context:
|
|
|
- formatted.append(f"RELEVANTSUSE SKOOR: {parsed_context['relevance_score']}/10")
|
|
|
-
|
|
|
- if 'analysis' in parsed_context and parsed_context['analysis']:
|
|
|
- formatted.append(f"ANALÜÜS: {parsed_context['analysis']}")
|
|
|
-
|
|
|
- if 'raw_analysis' in parsed_context and parsed_context['raw_analysis']:
|
|
|
- formatted.append(f"ANALÜÜS: {parsed_context['raw_analysis']}")
|
|
|
-
|
|
|
- return "\n\n".join(formatted)
|
|
|
- else:
|
|
|
- return clean_markdown_for_pdf(str(parsed_context))
|
|
|
+ # PARANDUS: Escape jutumärgid, mis on stringi väärtuste sees
|
|
|
+ # Leia kõik "key": "value" paare ja escape "value" sees olevad jutumärgid
|
|
|
+ def escape_quotes_in_values(match):
|
|
|
+ key = match.group(1)
|
|
|
+ value = match.group(2)
|
|
|
+ # Escape jutumärgid value sees
|
|
|
+ value_escaped = value.replace('"', '\\"')
|
|
|
+ return f'"{key}": "{value_escaped}"'
|
|
|
+
|
|
|
+ inner_txt = re.sub(r'"(\w+)":\s*"([^"]*(?:"[^"]*)*)"', escape_quotes_in_values, inner_txt)
|
|
|
+
|
|
|
+ try:
|
|
|
+ parsed = json.loads(inner_txt)
|
|
|
+ #print("✅ Inner JSON parsed successfully!")
|
|
|
+ #print(f"DEBUG inner parsed keys: {list(parsed.keys())}")
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ #print(f"❌ Inner JSON parse failed: {e}")
|
|
|
+
|
|
|
+ # AGRESSIIVNE PARANDUS: kasuta regex fallback'i
|
|
|
+ #print("⚠️ Falling back to regex extraction...")
|
|
|
+
|
|
|
+ # Taasta originaal inner_txt (ilma escapimiseta)
|
|
|
+ inner_txt = parsed['analysis']
|
|
|
+ inner_txt = re.sub(r'```json\s*', '', inner_txt)
|
|
|
+ inner_txt = re.sub(r'\s*```', '', inner_txt)
|
|
|
+ inner_txt = inner_txt.strip()
|
|
|
+
|
|
|
+ # Kasuta regex'i otse inner_txt pealt
|
|
|
+ temp_parts = []
|
|
|
+
|
|
|
+ match = re.search(r'"theoretical_contribution":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ temp_parts.append("TEOREETILINE PANUS:\n" + match.group(1))
|
|
|
+
|
|
|
+ match = re.search(r'"practical_applicability":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ temp_parts.append("PRAKTILINE RAKENDATAVUS:\n" + match.group(1))
|
|
|
+
|
|
|
+ match = re.search(r'"problem_solving":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ temp_parts.append("PROBLEEMILAHENDUS:\n" + match.group(1))
|
|
|
+
|
|
|
+ match = re.search(r'"limitations":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ temp_parts.append("PIIRANGUD:\n" + match.group(1))
|
|
|
+
|
|
|
+ match = re.search(r'"relevance_score":\s*(\d+)', inner_txt)
|
|
|
+ if match:
|
|
|
+ temp_parts.append(f"RELEVANTSUSE SKOOR: {match.group(1)}/10")
|
|
|
+
|
|
|
+ if temp_parts:
|
|
|
+ #print(f"✅ Regex extracted {len(temp_parts)} parts")
|
|
|
+ return "\n\n".join(temp_parts)
|
|
|
+
|
|
|
+ # Nüüd kasuta parsed dict'i
|
|
|
+ if parsed.get("theoretical_contribution"):
|
|
|
+ formatted_parts.append(
|
|
|
+ "TEOREETILINE PANUS:\n" + str(parsed["theoretical_contribution"])
|
|
|
+ )
|
|
|
+ if parsed.get("practical_applicability"):
|
|
|
+ formatted_parts.append(
|
|
|
+ "PRAKTILINE RAKENDATAVUS:\n" + str(parsed["practical_applicability"])
|
|
|
+ )
|
|
|
+ if parsed.get("problem_solving"):
|
|
|
+ formatted_parts.append(
|
|
|
+ "PROBLEEMILAHENDUS:\n" + str(parsed["problem_solving"])
|
|
|
+ )
|
|
|
+ if parsed.get("limitations"):
|
|
|
+ formatted_parts.append(
|
|
|
+ "PIIRANGUD:\n" + str(parsed["limitations"])
|
|
|
+ )
|
|
|
+ if "relevance_score" in parsed:
|
|
|
+ formatted_parts.append(
|
|
|
+ f"RELEVANTSUSE SKOOR: {parsed['relevance_score']}/10"
|
|
|
+ )
|
|
|
+
|
|
|
+ return "\n\n".join(formatted_parts) if formatted_parts else None
|
|
|
+
|
|
|
+ # Kui JSON ei õnnestunud → kasuta regex-i
|
|
|
+ print("⚠️ Using regex fallback")
|
|
|
+
|
|
|
+ # Regex peab nüüd käsitlema newline't – kasuta re.DOTALL
|
|
|
+ match = re.search(r'"theoretical_contribution":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
|
|
|
+ if content:
|
|
|
+ formatted_parts.append(f"TEOREETILINE PANUS:\n{content}")
|
|
|
+
|
|
|
+ match = re.search(r'"practical_applicability":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
|
|
|
+ if content:
|
|
|
+ formatted_parts.append(f"PRAKTILINE RAKENDATAVUS:\n{content}")
|
|
|
+
|
|
|
+ match = re.search(r'"problem_solving":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
|
|
|
+ if content:
|
|
|
+ formatted_parts.append(f"PROBLEEMILAHENDUS:\n{content}")
|
|
|
+
|
|
|
+ match = re.search(r'"limitations":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
|
|
|
+ if match:
|
|
|
+ content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
|
|
|
+ if content:
|
|
|
+ formatted_parts.append(f"PIIRANGUD:\n{content}")
|
|
|
+
|
|
|
+ match = re.search(r'"relevance_score":\s*(\d+(?:\.\d+)?)', txt)
|
|
|
+ if match:
|
|
|
+ score = match.group(1)
|
|
|
+ formatted_parts.append(f"RELEVANTSUSE SKOOR: {score}/10")
|
|
|
+
|
|
|
+ return "\n\n".join(formatted_parts) if formatted_parts else None
|
|
|
+
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# WEAVIATE ANDMEBAASIST PÄRING
|
|
|
+# ============================================================================
|
|
|
|
|
|
def get_all_articles_from_weaviate():
|
|
|
- """Toob kõik artiklid Weaviate'ist"""
|
|
|
+ """Toob kõik artiklid Weaviate andmebaasist"""
|
|
|
client = WeaviateClient()
|
|
|
articles = []
|
|
|
-
|
|
|
+
|
|
|
try:
|
|
|
collection = client.client.collections.get("ScientificArticle")
|
|
|
-
|
|
|
+
|
|
|
# Loendi kokku
|
|
|
count_response = collection.aggregate.over_all(total_count=True)
|
|
|
total = count_response.total_count
|
|
|
-
|
|
|
print(f"Weaviate'is leidsin {total} artiklit")
|
|
|
-
|
|
|
+
|
|
|
if total > 0:
|
|
|
# Toob kõik artiklid
|
|
|
response = collection.query.fetch_objects(limit=total)
|
|
|
-
|
|
|
+
|
|
|
for obj in response.objects:
|
|
|
try:
|
|
|
article = {
|
|
|
@@ -258,73 +381,35 @@ def get_all_articles_from_weaviate():
|
|
|
'summary_et': clean_markdown_for_pdf(obj.properties.get('summary_et', '')),
|
|
|
'key_concepts': [clean_markdown_for_pdf(c) for c in obj.properties.get('key_concepts', [])],
|
|
|
'methods_used': [clean_markdown_for_pdf(m) for m in obj.properties.get('methods_used', [])],
|
|
|
- 'transport_context': parse_transport_context(obj.properties.get('transport_context', {})),
|
|
|
+ 'transport_context': obj.properties.get('transport_context', {}),
|
|
|
'relevance_score': obj.properties.get('relevance_score', 'N/A'),
|
|
|
'processing_date': obj.properties.get('processing_date', ''),
|
|
|
'source_file': obj.properties.get('source_file', '')
|
|
|
}
|
|
|
articles.append(article)
|
|
|
+
|
|
|
except Exception as e:
|
|
|
- print(f" Viga artikli {obj.properties.get('article_id', 'unknown')} töötlemisel: {e}")
|
|
|
- # Lisa artikel ilma puhastuseta
|
|
|
- article = {
|
|
|
- 'article_id': obj.properties.get('article_id', 'N/A'),
|
|
|
- 'title': str(obj.properties.get('title', 'N/A')),
|
|
|
- 'authors': obj.properties.get('authors', []),
|
|
|
- 'year': obj.properties.get('year', 'N/A'),
|
|
|
- 'journal': str(obj.properties.get('journal', 'N/A')),
|
|
|
- 'doi': obj.properties.get('doi', ''),
|
|
|
- 'abstract_en': str(obj.properties.get('abstract_en', '')),
|
|
|
- 'summary_et': str(obj.properties.get('summary_et', '')),
|
|
|
- 'key_concepts': [str(c) for c in obj.properties.get('key_concepts', [])],
|
|
|
- 'methods_used': [str(m) for m in obj.properties.get('methods_used', [])],
|
|
|
- 'transport_context': str(obj.properties.get('transport_context', {})),
|
|
|
- 'relevance_score': obj.properties.get('relevance_score', 'N/A'),
|
|
|
- 'processing_date': obj.properties.get('processing_date', ''),
|
|
|
- 'source_file': obj.properties.get('source_file', '')
|
|
|
- }
|
|
|
- articles.append(article)
|
|
|
-
|
|
|
+ print(f"⚠️ Viga artikli {obj.properties.get('article_id', 'unknown')} töötlemisel: {e}")
|
|
|
+ # Jätka järgmise artikliga
|
|
|
+ continue
|
|
|
+
|
|
|
except Exception as e:
|
|
|
- print(f"Viga artiklite toomisel: {e}")
|
|
|
- import traceback
|
|
|
+ print(f"❌ Viga artiklite toomisel: {e}")
|
|
|
traceback.print_exc()
|
|
|
+
|
|
|
finally:
|
|
|
client.close()
|
|
|
-
|
|
|
+
|
|
|
return articles
|
|
|
|
|
|
-def format_summary_for_pdf(summary):
|
|
|
- """Vorminda kokkuvõte PDF-ile sobivaks"""
|
|
|
- if not summary:
|
|
|
- return ""
|
|
|
-
|
|
|
- # Kui ei ole string, konverteeri
|
|
|
- if not isinstance(summary, str):
|
|
|
- summary = str(summary)
|
|
|
-
|
|
|
- # Eemalda kõik vormindus ja tee lihtsaks tekstiks
|
|
|
- summary = clean_markdown_for_pdf(summary)
|
|
|
-
|
|
|
- # Lisa uued read peamiste sektsioonide ette
|
|
|
- summary = summary.replace('1. ARTIKLI PEAMISED PUNKTID:', '\n1. ARTIKLI PEAMISED PUNKTID:\n')
|
|
|
- summary = summary.replace('2. KASUTATUD MEETODID:', '\n\n2. KASUTATUD MEETODID:\n')
|
|
|
- summary = summary.replace('3. PEAMISED TULEMUSED:', '\n\n3. PEAMISED TULEMUSED:\n')
|
|
|
- summary = summary.replace('4. JÄRELDUSED JA SOOVITUSED:', '\n\n4. JÄRELDUSED JA SOOVITUSED:\n')
|
|
|
- summary = summary.replace('5. TRANSFORDIPLANEERIMISE KONTEKST:', '\n\n5. TRANSFORDIPLANEERIMISE KONTEKST:\n')
|
|
|
-
|
|
|
- # Asenda liigsed reavahetused
|
|
|
- summary = re.sub(r'\n{3,}', '\n\n', summary)
|
|
|
-
|
|
|
- # Lõika liiga pikk tekst
|
|
|
- if len(summary) > 4000:
|
|
|
- summary = summary[:4000] + "... [kokkuvõte lõigatud, liiga pikk]"
|
|
|
-
|
|
|
- return summary
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# PDF GENEREERIMINE
|
|
|
+# ============================================================================
|
|
|
|
|
|
def create_pdf_from_articles(articles, output_filename):
|
|
|
"""Loob PDF faili artiklitest"""
|
|
|
-
|
|
|
+
|
|
|
# Loo PDF dokument
|
|
|
doc = SimpleDocTemplate(
|
|
|
output_filename,
|
|
|
@@ -334,10 +419,10 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
topMargin=72,
|
|
|
bottomMargin=72
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
# Stiilide loomine
|
|
|
styles = getSampleStyleSheet()
|
|
|
-
|
|
|
+
|
|
|
# Kohandatud stiilid
|
|
|
title_style = ParagraphStyle(
|
|
|
'CustomTitle',
|
|
|
@@ -347,7 +432,7 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
textColor=colors.HexColor('#2c3e50'),
|
|
|
alignment=TA_LEFT
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
subtitle_style = ParagraphStyle(
|
|
|
'CustomSubtitle',
|
|
|
parent=styles['Heading2'],
|
|
|
@@ -356,7 +441,7 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
textColor=colors.HexColor('#34495e'),
|
|
|
alignment=TA_LEFT
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
section_style = ParagraphStyle(
|
|
|
'CustomSection',
|
|
|
parent=styles['Heading3'],
|
|
|
@@ -366,16 +451,16 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
textColor=colors.HexColor('#7f8c8d'),
|
|
|
alignment=TA_LEFT
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
normal_style = ParagraphStyle(
|
|
|
'CustomNormal',
|
|
|
parent=styles['Normal'],
|
|
|
fontSize=10,
|
|
|
spaceAfter=6,
|
|
|
alignment=TA_JUSTIFY,
|
|
|
- leading=14 # Reavahe
|
|
|
+ leading=14
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
metadata_style = ParagraphStyle(
|
|
|
'CustomMetadata',
|
|
|
parent=styles['Normal'],
|
|
|
@@ -384,44 +469,45 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
textColor=colors.HexColor('#5d6d7e'),
|
|
|
alignment=TA_LEFT
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
# Elementide kogumine
|
|
|
elements = []
|
|
|
-
|
|
|
+
|
|
|
# Pealkiri ja kokkuvõte
|
|
|
elements.append(Paragraph("TEADUSARTIKLITE ANDMEBAAS", title_style))
|
|
|
elements.append(Spacer(1, 12))
|
|
|
-
|
|
|
+
|
|
|
today = datetime.now().strftime("%d.%m.%Y %H:%M")
|
|
|
elements.append(Paragraph(f"Eksporditud: {today}", metadata_style))
|
|
|
elements.append(Paragraph(f"Artikleid kokku: {len(articles)}", metadata_style))
|
|
|
elements.append(Spacer(1, 24))
|
|
|
-
|
|
|
+
|
|
|
+ # ========================================================================
|
|
|
# Iga artikli jaoks
|
|
|
+ # ========================================================================
|
|
|
+
|
|
|
for i, article in enumerate(articles):
|
|
|
- # Artikli pealkiri
|
|
|
+
|
|
|
+ # ARTIKLI PEALKIRI
|
|
|
elements.append(Paragraph(f"{i+1}. {article['title']}", title_style))
|
|
|
-
|
|
|
- # Autorid
|
|
|
+ print(f"✅ {i+1}. {article['title']}")
|
|
|
+
|
|
|
+ # AUTORID
|
|
|
if article['authors']:
|
|
|
authors_text = ", ".join(article['authors'])
|
|
|
- elements.append(Paragraph(f"<b>Autorid:</b> {authors_text}", subtitle_style))
|
|
|
-
|
|
|
- # Metaandmed tabelina
|
|
|
+ elements.append(Paragraph(f"Autorid: {authors_text}", subtitle_style))
|
|
|
+
|
|
|
+ # METAANDMED (aasta, žurnaal, DOI, relevantsus)
|
|
|
metadata_data = []
|
|
|
-
|
|
|
if article['year'] and article['year'] != 'N/A':
|
|
|
metadata_data.append(['Aasta:', str(article['year'])])
|
|
|
-
|
|
|
if article['journal'] and article['journal'] != 'N/A':
|
|
|
metadata_data.append(['Žurnaal:', article['journal']])
|
|
|
-
|
|
|
if article['doi']:
|
|
|
metadata_data.append(['DOI:', article['doi']])
|
|
|
-
|
|
|
if article['relevance_score'] and article['relevance_score'] != 'N/A':
|
|
|
metadata_data.append(['Relevantsus:', f"{article['relevance_score']}/10"])
|
|
|
-
|
|
|
+
|
|
|
if metadata_data:
|
|
|
metadata_table = Table(metadata_data, colWidths=[2*cm, 12*cm])
|
|
|
metadata_table.setStyle(TableStyle([
|
|
|
@@ -434,18 +520,16 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
]))
|
|
|
elements.append(metadata_table)
|
|
|
elements.append(Spacer(1, 12))
|
|
|
-
|
|
|
- # Võtmesõnad ja meetodid
|
|
|
+
|
|
|
+ # VÕTMESÕNAD JA MEETODID
|
|
|
tags_data = []
|
|
|
-
|
|
|
if article['key_concepts']:
|
|
|
- concepts_text = ", ".join(article['key_concepts'][:10]) # Piirangu 10 mõistele
|
|
|
+ concepts_text = ", ".join(article['key_concepts'][:10]) # Piirang: 10 mõiste
|
|
|
tags_data.append(['Võtmesõnad:', concepts_text])
|
|
|
-
|
|
|
if article['methods_used']:
|
|
|
methods_text = ", ".join(article['methods_used'])
|
|
|
tags_data.append(['Meetodid:', methods_text])
|
|
|
-
|
|
|
+
|
|
|
if tags_data:
|
|
|
tags_table = Table(tags_data, colWidths=[2*cm, 12*cm])
|
|
|
tags_table.setStyle(TableStyle([
|
|
|
@@ -459,165 +543,154 @@ def create_pdf_from_articles(articles, output_filename):
|
|
|
]))
|
|
|
elements.append(tags_table)
|
|
|
elements.append(Spacer(1, 12))
|
|
|
-
|
|
|
- # Abstrakt
|
|
|
+
|
|
|
+ # ABSTRAKT (inglise keeles)
|
|
|
if article['abstract_en']:
|
|
|
- elements.append(Paragraph("<b>ABSTRAKT (inglise keeles):</b>", section_style))
|
|
|
+ elements.append(Paragraph("ABSTRAKT (inglise keeles):", section_style))
|
|
|
abstract_text = article['abstract_en']
|
|
|
if len(abstract_text) > 800:
|
|
|
abstract_text = abstract_text[:800] + "..."
|
|
|
elements.append(Paragraph(abstract_text, normal_style))
|
|
|
elements.append(Spacer(1, 12))
|
|
|
-
|
|
|
- # Kokkuvõte
|
|
|
+
|
|
|
+ # KOKKUVÕTE (eesti keeles)
|
|
|
if article['summary_et']:
|
|
|
- elements.append(Paragraph("<b>KOKKUVÕTE (eesti keeles):</b>", section_style))
|
|
|
-
|
|
|
- # Formateeri kokkuvõte PDF-ile
|
|
|
+ elements.append(Paragraph("KOKKUVÕTE (eesti keeles):", section_style))
|
|
|
summary = format_summary_for_pdf(article['summary_et'])
|
|
|
-
|
|
|
- # Kasuta lihtsat tekstiparagraphi
|
|
|
elements.append(Paragraph(summary, normal_style))
|
|
|
elements.append(Spacer(1, 12))
|
|
|
-
|
|
|
- # Transpordi kontekst
|
|
|
+
|
|
|
+ # ====================================================================
|
|
|
+ # TRANSPORDI PLANEERIMISE KONTEKST
|
|
|
+ # ====================================================================
|
|
|
+
|
|
|
if article['transport_context']:
|
|
|
- # Debugimiseks
|
|
|
- debugger_data = str(article['transport_context'])
|
|
|
- print("----------- \"" + article['title'] + "\" -----------")
|
|
|
- print("----------- article['transport_context'] -------------")
|
|
|
- print(debugger_data)
|
|
|
- elements.append(Paragraph("<b>TRANSFORDIPLANEERIMISE KONTEKST:</b>", section_style))
|
|
|
- context_text = format_context_for_pdf(article['transport_context']['relevance_score'])
|
|
|
+ elements.append(Paragraph("TRANSFORDIPLANEERIMISE KONTEKST:", section_style))
|
|
|
+
|
|
|
+ # DEBUG-REA – prindi üks-ühele objekt konsooli
|
|
|
+ #print("DEBUG transport_context:", article['article_id'], article['transport_context'])
|
|
|
+
|
|
|
+ context_text = format_transport_context(article['transport_context'])
|
|
|
+ #print(f"DEBUG context_text returned: {context_text}") # <-- LISA SEE RIDA
|
|
|
+
|
|
|
if context_text:
|
|
|
- elements.append(Paragraph("RELEVANTSUSE SKOOR: " + context_text, normal_style))
|
|
|
- elements.append(Spacer(1, 1))
|
|
|
-
|
|
|
- elements.append(Paragraph("<b>ANALÜÜS:</b>", normal_style))
|
|
|
- analysis_text = article['transport_context']['analysis']
|
|
|
- # Proovi leida theoretical_contribution regex'iga
|
|
|
- match = re.search(r'"theoretical_contribution":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
|
|
|
- if match:
|
|
|
- context_text = match.group(1)
|
|
|
- if context_text:
|
|
|
- elements.append(Paragraph("<b>TEOREETILINE PANUS:</b>", normal_style))
|
|
|
- elements.append(Paragraph(context_text, normal_style))
|
|
|
- # Proovi leida practical_applicability regex'iga
|
|
|
- match = re.search(r'"practical_applicability":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
|
|
|
- if match:
|
|
|
- context_text = match.group(1)
|
|
|
- if context_text:
|
|
|
- elements.append(Paragraph("<b>PRAKTILINE RAKENDATAVUS:</b>", normal_style))
|
|
|
- elements.append(Paragraph(context_text, normal_style))
|
|
|
- # Proovi leida problem_solving regex'iga
|
|
|
- match = re.search(r'"problem_solving":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
|
|
|
- if match:
|
|
|
- context_text = match.group(1)
|
|
|
- if context_text:
|
|
|
- elements.append(Paragraph("<b>PROBLEEMILAHENDUS:</b>", normal_style))
|
|
|
- elements.append(Paragraph(context_text, normal_style))
|
|
|
- # Proovi leida limitations regex'iga
|
|
|
- match = re.search(r'"limitations":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
|
|
|
- if match:
|
|
|
- context_text = match.group(1)
|
|
|
- if context_text:
|
|
|
- elements.append(Paragraph("<b>PIIRANGUD:</b>", normal_style))
|
|
|
- elements.append(Paragraph(context_text, normal_style))
|
|
|
- # Proovi leida relevance_score regex'iga
|
|
|
- match = re.search(r'"relevance_score":\s*(\d+(?:\.\d+)?)', analysis_text)
|
|
|
- if match:
|
|
|
- context_text = match.group(1)
|
|
|
- if context_text:
|
|
|
- elements.append(Paragraph("<b>RELEVANTSUSE SKOOR:</b> " + context_text, normal_style))
|
|
|
-
|
|
|
- # Allikfail ja töötlemise info
|
|
|
+ # Jaga osadeks ja lisa eraldi paragrahfidena
|
|
|
+ parts = context_text.split('\n\n') # Jaga tühjast reaga
|
|
|
+ for part in parts:
|
|
|
+ if part.strip():
|
|
|
+ # Asenda \n <br/> tag'iga
|
|
|
+ part_html = part.replace('\n', '<br/>')
|
|
|
+ try:
|
|
|
+ elements.append(Paragraph(part_html, normal_style))
|
|
|
+ elements.append(Spacer(1, 6))
|
|
|
+ except Exception as e:
|
|
|
+ print(f"❌ Failed to add part to PDF: {e}")
|
|
|
+ # Kui HTML tag ei tööta, proovi ilma
|
|
|
+ part_plain = part.replace('\n', ' ')
|
|
|
+ elements.append(Paragraph(part_plain, normal_style))
|
|
|
+ elements.append(Spacer(1, 6))
|
|
|
+
|
|
|
+ print("✅ Context added to PDF successfully")
|
|
|
+ else:
|
|
|
+ elements.append(Paragraph("Analüüsi andmed puuduvad", normal_style))
|
|
|
+
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+
|
|
|
+ # ====================================================================
|
|
|
+ # FOOTER INFO (allikfail, töötlemise kuupäev)
|
|
|
+ # ====================================================================
|
|
|
+
|
|
|
footer_info = []
|
|
|
if article['source_file']:
|
|
|
source_name = os.path.basename(article['source_file'])
|
|
|
footer_info.append(f"Allikfail: {source_name}")
|
|
|
-
|
|
|
+
|
|
|
if article['processing_date']:
|
|
|
- # Proovi parsida kuupäeva
|
|
|
try:
|
|
|
- # Eemalda mikrosekundid kui on
|
|
|
date_str = article['processing_date']
|
|
|
if '.' in date_str:
|
|
|
date_str = date_str.split('.')[0]
|
|
|
date_str = date_str.replace('Z', '+00:00')
|
|
|
date_obj = datetime.fromisoformat(date_str)
|
|
|
footer_info.append(f"Töödeldud: {date_obj.strftime('%d.%m.%Y %H:%M')}")
|
|
|
- except Exception as e:
|
|
|
- # Kui ei õnnestu parsida, kuva algne string (lõigatud)
|
|
|
+ except Exception:
|
|
|
footer_info.append(f"Töödeldud: {article['processing_date'][:19]}")
|
|
|
-
|
|
|
+
|
|
|
if footer_info:
|
|
|
elements.append(Spacer(1, 6))
|
|
|
elements.append(Paragraph(" | ".join(footer_info), metadata_style))
|
|
|
-
|
|
|
+
|
|
|
# Lisa lehevahetus (välja arvatud viimase artikli puhul)
|
|
|
if i < len(articles) - 1:
|
|
|
elements.append(PageBreak())
|
|
|
else:
|
|
|
elements.append(Spacer(1, 24))
|
|
|
-
|
|
|
- # Lisa lõppinfo
|
|
|
+
|
|
|
+ # LÕPPINFO
|
|
|
elements.append(Paragraph("=" * 80, metadata_style))
|
|
|
elements.append(Spacer(1, 6))
|
|
|
elements.append(Paragraph(f"Kokku eksporditud artikleid: {len(articles)}", metadata_style))
|
|
|
elements.append(Paragraph("Eksporditud Weaviate teadusartiklite andmebaasist", metadata_style))
|
|
|
elements.append(Paragraph(f"PDF genereeritud: {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}", metadata_style))
|
|
|
-
|
|
|
+
|
|
|
# Koosta PDF
|
|
|
doc.build(elements)
|
|
|
-
|
|
|
+
|
|
|
return len(articles)
|
|
|
|
|
|
+
|
|
|
+# ============================================================================
|
|
|
+# PEAMINE FUNKTSIOON
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
def main():
|
|
|
- """Peamine funktsioon"""
|
|
|
+ """Peamine funktsioon - käivitab kogu protsessi"""
|
|
|
+
|
|
|
print("=" * 60)
|
|
|
print("ARTIKLITE EKSPORT PDF FAILI")
|
|
|
print("=" * 60)
|
|
|
-
|
|
|
+
|
|
|
# Toob artiklid Weaviate'ist
|
|
|
print("Toon artikleid Weaviate'ist...")
|
|
|
articles = get_all_articles_from_weaviate()
|
|
|
-
|
|
|
+
|
|
|
if not articles:
|
|
|
- print("Ei leidnud ühtegi artiklit Weaviate'is!")
|
|
|
+ print("❌ Ei leidnud ühtegi artiklit Weaviate'is!")
|
|
|
return
|
|
|
-
|
|
|
- print(f"Leidsin {len(articles)} artiklit")
|
|
|
-
|
|
|
+
|
|
|
+ print(f"✓ Leidsin {len(articles)} artiklit")
|
|
|
+
|
|
|
# Genereeri PDF failinimi
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
output_dir = "./data/exports"
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
output_filename = os.path.join(output_dir, f"artiklid_eksport_{timestamp}.pdf")
|
|
|
-
|
|
|
+
|
|
|
# Loo PDF
|
|
|
print(f"Loon PDF faili: {output_filename}")
|
|
|
+
|
|
|
try:
|
|
|
article_count = create_pdf_from_articles(articles, output_filename)
|
|
|
-
|
|
|
+
|
|
|
print("=" * 60)
|
|
|
print(f"✅ VALMIS! Loodud PDF fail: {output_filename}")
|
|
|
- print(f" - Eksporditud artikleid: {article_count}")
|
|
|
- print(f" - Faili suurus: {os.path.getsize(output_filename) / 1024:.1f} KB")
|
|
|
+ print(f" - Eksporditud artikleid: {article_count}")
|
|
|
+ print(f" - Faili suurus: {os.path.getsize(output_filename) / 1024:.1f} KB")
|
|
|
print("=" * 60)
|
|
|
-
|
|
|
- # Näita esimese artikli pealkirja
|
|
|
+
|
|
|
if articles:
|
|
|
print("\nEsimesed artiklid:")
|
|
|
for i, article in enumerate(articles[:3]):
|
|
|
title_preview = article['title']
|
|
|
if len(title_preview) > 60:
|
|
|
title_preview = title_preview[:60] + "..."
|
|
|
- print(f" {i+1}. {title_preview}")
|
|
|
-
|
|
|
+ print(f" {i+1}. {title_preview}")
|
|
|
+
|
|
|
except Exception as e:
|
|
|
print(f"\n❌ VIGA PDF loomisel: {e}")
|
|
|
- import traceback
|
|
|
traceback.print_exc()
|
|
|
|
|
|
+
|
|
|
if __name__ == "__main__":
|
|
|
- main()
|
|
|
+ main()
|