|
|
@@ -0,0 +1,490 @@
|
|
|
+# save_articles_to_pdf.py
|
|
|
+
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import re
|
|
|
+from datetime import datetime
|
|
|
+from reportlab.lib.pagesizes import letter, A4
|
|
|
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
|
|
|
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
|
+from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER, TA_LEFT
|
|
|
+from reportlab.lib import colors
|
|
|
+from reportlab.lib.units import inch, cm
|
|
|
+from reportlab.pdfbase import pdfmetrics
|
|
|
+from reportlab.pdfbase.ttfonts import TTFont
|
|
|
+import json
|
|
|
+
|
|
|
+# Lisa src kaust Pythoni teele
|
|
|
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
+
|
|
|
+from src.weaviate_client import WeaviateClient
|
|
|
+
|
|
|
+def clean_html_tags(text):
|
|
|
+ """Puhasta tekst HTML/XML siltidest ReportLab-i jaoks"""
|
|
|
+ if not text:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ # Eemalda kõik HTML/XML siltid
|
|
|
+ text = re.sub(r'<[^>]+>', '', text)
|
|
|
+
|
|
|
+ # Asenda erimärgid ReportLab-ile sobivate märkidega
|
|
|
+ replacements = {
|
|
|
+ ' ': ' ',
|
|
|
+ '&': '&',
|
|
|
+ '<': '<',
|
|
|
+ '>': '>',
|
|
|
+ '"': '"',
|
|
|
+ ''': "'",
|
|
|
+ ''': "'",
|
|
|
+ '\u00a0': ' ', # mitte-tühik
|
|
|
+ '\u2026': '...', # ellipsis
|
|
|
+ '\u2013': '-', # n-sild
|
|
|
+ '\u2014': '-', # m-sild
|
|
|
+ '\u2018': "'", # vasak ülakoma
|
|
|
+ '\u2019': "'", # parem ülakoma
|
|
|
+ '\u201c': '"', # vasak jutumärk
|
|
|
+ '\u201d': '"', # parem jutumärk
|
|
|
+ }
|
|
|
+
|
|
|
+ for old, new in replacements.items():
|
|
|
+ text = text.replace(old, new)
|
|
|
+
|
|
|
+ return text
|
|
|
+
|
|
|
+def clean_markdown_for_pdf(text):
|
|
|
+ """Konverteeri markdown ReportLab-ile sobivaks tekstiks"""
|
|
|
+ if not text:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ # Kui ei ole string, konverteeri stringiks
|
|
|
+ if not isinstance(text, str):
|
|
|
+ text = str(text)
|
|
|
+
|
|
|
+ # Eemalda HTML siltid
|
|
|
+ text = clean_html_tags(text)
|
|
|
+
|
|
|
+ # Asenda markdown pealkirjad
|
|
|
+ text = re.sub(r'#{1,6}\s+', '', text) # Eemalda # pealkirjad
|
|
|
+ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # Asenda **bold** lihtsalt tekstiga
|
|
|
+ text = re.sub(r'\*(.+?)\*', r'\1', text) # Asenda *italic* lihtsalt tekstiga
|
|
|
+ text = re.sub(r'__(.+?)__', r'\1', text) # Asenda __underline__ lihtsalt tekstiga
|
|
|
+ text = re.sub(r'~~(.+?)~~', r'\1', text) # Asenda ~~strikethrough~~ lihtsalt tekstiga
|
|
|
+
|
|
|
+ # Asenda loetelud (PARANDATUD: kasuta \\1 mitte \1)
|
|
|
+ text = re.sub(r'^\s*[-*+]\s+', '• ', text, flags=re.MULTILINE)
|
|
|
+ text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) # PARANDATUD
|
|
|
+
|
|
|
+ # Asenda koodiblokid
|
|
|
+ text = re.sub(r'```[^`]+```', '', text) # Eemalda koodiblokid
|
|
|
+ text = re.sub(r'`([^`]+)`', r'[\1]', text) # Asenda inline kood
|
|
|
+
|
|
|
+ # Asenda lingid
|
|
|
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Eemalda lingid
|
|
|
+
|
|
|
+ # Eemalda liigsed tühikud
|
|
|
+ text = re.sub(r'\s+', ' ', text)
|
|
|
+
|
|
|
+ return text.strip()
|
|
|
+
|
|
|
+def parse_transport_context(context_data):
|
|
|
+ """Parsi transpordikonteksti JSON-ist loetavaks"""
|
|
|
+ if isinstance(context_data, str):
|
|
|
+ # Proovi parsida string JSON-iks
|
|
|
+ try:
|
|
|
+ return json.loads(context_data)
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ # Kui ei õnnestu, tagasta puhastatud string
|
|
|
+ return {"raw_analysis": clean_markdown_for_pdf(context_data)}
|
|
|
+ elif isinstance(context_data, dict):
|
|
|
+ # Puhasta kõik stringiväljad
|
|
|
+ cleaned = {}
|
|
|
+ for key, value in context_data.items():
|
|
|
+ if isinstance(value, str):
|
|
|
+ cleaned[key] = clean_markdown_for_pdf(value)
|
|
|
+ else:
|
|
|
+ cleaned[key] = value
|
|
|
+ return cleaned
|
|
|
+ return context_data
|
|
|
+
|
|
|
+def format_context_for_pdf(parsed_context):
|
|
|
+ """Vorminda parsitud kontekst PDF-i jaoks"""
|
|
|
+ if isinstance(parsed_context, dict):
|
|
|
+ formatted = []
|
|
|
+
|
|
|
+ if 'theoretical_contribution' in parsed_context and parsed_context['theoretical_contribution']:
|
|
|
+ formatted.append(f"TEOREETILINE PANUS: {parsed_context['theoretical_contribution']}")
|
|
|
+
|
|
|
+ if 'practical_applicability' in parsed_context and parsed_context['practical_applicability']:
|
|
|
+ formatted.append(f"PRAKTILINE RAKENDATAVUS: {parsed_context['practical_applicability']}")
|
|
|
+
|
|
|
+ if 'problem_solving' in parsed_context and parsed_context['problem_solving']:
|
|
|
+ formatted.append(f"PROBLEEMILAHDUS: {parsed_context['problem_solving']}")
|
|
|
+
|
|
|
+ if 'limitations' in parsed_context and parsed_context['limitations']:
|
|
|
+ formatted.append(f"PIIRANGUD: {parsed_context['limitations']}")
|
|
|
+
|
|
|
+ if 'relevance_score' in parsed_context:
|
|
|
+ formatted.append(f"RELEVANTSUSE SKOOR: {parsed_context['relevance_score']}/10")
|
|
|
+
|
|
|
+ if 'analysis' in parsed_context and parsed_context['analysis']:
|
|
|
+ formatted.append(f"ANALÜÜS: {parsed_context['analysis']}")
|
|
|
+
|
|
|
+ if 'raw_analysis' in parsed_context and parsed_context['raw_analysis']:
|
|
|
+ formatted.append(f"ANALÜÜS: {parsed_context['raw_analysis']}")
|
|
|
+
|
|
|
+ return "\n\n".join(formatted)
|
|
|
+ else:
|
|
|
+ return clean_markdown_for_pdf(str(parsed_context))
|
|
|
+
|
|
|
+def get_all_articles_from_weaviate():
|
|
|
+ """Toob kõik artiklid Weaviate'ist"""
|
|
|
+ client = WeaviateClient()
|
|
|
+ articles = []
|
|
|
+
|
|
|
+ try:
|
|
|
+ collection = client.client.collections.get("ScientificArticle")
|
|
|
+
|
|
|
+ # Loendi kokku
|
|
|
+ count_response = collection.aggregate.over_all(total_count=True)
|
|
|
+ total = count_response.total_count
|
|
|
+
|
|
|
+ print(f"Weaviate'is leidsin {total} artiklit")
|
|
|
+
|
|
|
+ if total > 0:
|
|
|
+ # Toob kõik artiklid
|
|
|
+ response = collection.query.fetch_objects(limit=total)
|
|
|
+
|
|
|
+ for obj in response.objects:
|
|
|
+ try:
|
|
|
+ article = {
|
|
|
+ 'article_id': obj.properties.get('article_id', 'N/A'),
|
|
|
+ 'title': clean_markdown_for_pdf(obj.properties.get('title', 'N/A')),
|
|
|
+ 'authors': obj.properties.get('authors', []),
|
|
|
+ 'year': obj.properties.get('year', 'N/A'),
|
|
|
+ 'journal': clean_markdown_for_pdf(obj.properties.get('journal', 'N/A')),
|
|
|
+ 'doi': obj.properties.get('doi', ''),
|
|
|
+ 'abstract_en': clean_markdown_for_pdf(obj.properties.get('abstract_en', '')),
|
|
|
+ 'summary_et': clean_markdown_for_pdf(obj.properties.get('summary_et', '')),
|
|
|
+ 'key_concepts': [clean_markdown_for_pdf(c) for c in obj.properties.get('key_concepts', [])],
|
|
|
+ 'methods_used': [clean_markdown_for_pdf(m) for m in obj.properties.get('methods_used', [])],
|
|
|
+ 'transport_context': parse_transport_context(obj.properties.get('transport_context', {})),
|
|
|
+ 'relevance_score': obj.properties.get('relevance_score', 'N/A'),
|
|
|
+ 'processing_date': obj.properties.get('processing_date', ''),
|
|
|
+ 'source_file': obj.properties.get('source_file', '')
|
|
|
+ }
|
|
|
+ articles.append(article)
|
|
|
+ except Exception as e:
|
|
|
+ print(f" Viga artikli {obj.properties.get('article_id', 'unknown')} töötlemisel: {e}")
|
|
|
+ # Lisa artikel ilma puhastuseta
|
|
|
+ article = {
|
|
|
+ 'article_id': obj.properties.get('article_id', 'N/A'),
|
|
|
+ 'title': str(obj.properties.get('title', 'N/A')),
|
|
|
+ 'authors': obj.properties.get('authors', []),
|
|
|
+ 'year': obj.properties.get('year', 'N/A'),
|
|
|
+ 'journal': str(obj.properties.get('journal', 'N/A')),
|
|
|
+ 'doi': obj.properties.get('doi', ''),
|
|
|
+ 'abstract_en': str(obj.properties.get('abstract_en', '')),
|
|
|
+ 'summary_et': str(obj.properties.get('summary_et', '')),
|
|
|
+ 'key_concepts': [str(c) for c in obj.properties.get('key_concepts', [])],
|
|
|
+ 'methods_used': [str(m) for m in obj.properties.get('methods_used', [])],
|
|
|
+ 'transport_context': str(obj.properties.get('transport_context', {})),
|
|
|
+ 'relevance_score': obj.properties.get('relevance_score', 'N/A'),
|
|
|
+ 'processing_date': obj.properties.get('processing_date', ''),
|
|
|
+ 'source_file': obj.properties.get('source_file', '')
|
|
|
+ }
|
|
|
+ articles.append(article)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Viga artiklite toomisel: {e}")
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ finally:
|
|
|
+ client.close()
|
|
|
+
|
|
|
+ return articles
|
|
|
+
|
|
|
+def format_summary_for_pdf(summary):
|
|
|
+ """Vorminda kokkuvõte PDF-ile sobivaks"""
|
|
|
+ if not summary:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ # Kui ei ole string, konverteeri
|
|
|
+ if not isinstance(summary, str):
|
|
|
+ summary = str(summary)
|
|
|
+
|
|
|
+ # Eemalda kõik vormindus ja tee lihtsaks tekstiks
|
|
|
+ summary = clean_markdown_for_pdf(summary)
|
|
|
+
|
|
|
+ # Lisa uued read peamiste sektsioonide ette
|
|
|
+ summary = summary.replace('1. ARTIKLI PEAMISED PUNKTID:', '\n1. ARTIKLI PEAMISED PUNKTID:\n')
|
|
|
+ summary = summary.replace('2. KASUTATUD MEETODID:', '\n\n2. KASUTATUD MEETODID:\n')
|
|
|
+ summary = summary.replace('3. PEAMISED TULEMUSED:', '\n\n3. PEAMISED TULEMUSED:\n')
|
|
|
+ summary = summary.replace('4. JÄRELDUSED JA SOOVITUSED:', '\n\n4. JÄRELDUSED JA SOOVITUSED:\n')
|
|
|
+ summary = summary.replace('5. TRANSFORDIPLANEERIMISE KONTEKST:', '\n\n5. TRANSFORDIPLANEERIMISE KONTEKST:\n')
|
|
|
+
|
|
|
+ # Asenda liigsed reavahetused
|
|
|
+ summary = re.sub(r'\n{3,}', '\n\n', summary)
|
|
|
+
|
|
|
+ # Lõika liiga pikk tekst
|
|
|
+ if len(summary) > 4000:
|
|
|
+ summary = summary[:4000] + "... [kokkuvõte lõigatud, liiga pikk]"
|
|
|
+
|
|
|
+ return summary
|
|
|
+
|
|
|
+def create_pdf_from_articles(articles, output_filename):
|
|
|
+ """Loob PDF faili artiklitest"""
|
|
|
+
|
|
|
+ # Loo PDF dokument
|
|
|
+ doc = SimpleDocTemplate(
|
|
|
+ output_filename,
|
|
|
+ pagesize=A4,
|
|
|
+ rightMargin=72,
|
|
|
+ leftMargin=72,
|
|
|
+ topMargin=72,
|
|
|
+ bottomMargin=72
|
|
|
+ )
|
|
|
+
|
|
|
+ # Stiilide loomine
|
|
|
+ styles = getSampleStyleSheet()
|
|
|
+
|
|
|
+ # Kohandatud stiilid
|
|
|
+ title_style = ParagraphStyle(
|
|
|
+ 'CustomTitle',
|
|
|
+ parent=styles['Heading1'],
|
|
|
+ fontSize=14,
|
|
|
+ spaceAfter=12,
|
|
|
+ textColor=colors.HexColor('#2c3e50'),
|
|
|
+ alignment=TA_LEFT
|
|
|
+ )
|
|
|
+
|
|
|
+ subtitle_style = ParagraphStyle(
|
|
|
+ 'CustomSubtitle',
|
|
|
+ parent=styles['Heading2'],
|
|
|
+ fontSize=12,
|
|
|
+ spaceAfter=6,
|
|
|
+ textColor=colors.HexColor('#34495e'),
|
|
|
+ alignment=TA_LEFT
|
|
|
+ )
|
|
|
+
|
|
|
+ section_style = ParagraphStyle(
|
|
|
+ 'CustomSection',
|
|
|
+ parent=styles['Heading3'],
|
|
|
+ fontSize=11,
|
|
|
+ spaceAfter=6,
|
|
|
+ spaceBefore=12,
|
|
|
+ textColor=colors.HexColor('#7f8c8d'),
|
|
|
+ alignment=TA_LEFT
|
|
|
+ )
|
|
|
+
|
|
|
+ normal_style = ParagraphStyle(
|
|
|
+ 'CustomNormal',
|
|
|
+ parent=styles['Normal'],
|
|
|
+ fontSize=10,
|
|
|
+ spaceAfter=6,
|
|
|
+ alignment=TA_JUSTIFY,
|
|
|
+ leading=14 # Reavahe
|
|
|
+ )
|
|
|
+
|
|
|
+ metadata_style = ParagraphStyle(
|
|
|
+ 'CustomMetadata',
|
|
|
+ parent=styles['Normal'],
|
|
|
+ fontSize=9,
|
|
|
+ spaceAfter=3,
|
|
|
+ textColor=colors.HexColor('#5d6d7e'),
|
|
|
+ alignment=TA_LEFT
|
|
|
+ )
|
|
|
+
|
|
|
+ # Elementide kogumine
|
|
|
+ elements = []
|
|
|
+
|
|
|
+ # Pealkiri ja kokkuvõte
|
|
|
+ elements.append(Paragraph("TEADUSARTIKLITE ANDMEBAAS", title_style))
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+ today = datetime.now().strftime("%d.%m.%Y %H:%M")
|
|
|
+ elements.append(Paragraph(f"Eksporditud: {today}", metadata_style))
|
|
|
+ elements.append(Paragraph(f"Artikleid kokku: {len(articles)}", metadata_style))
|
|
|
+ elements.append(Spacer(1, 24))
|
|
|
+
|
|
|
+ # Iga artikli jaoks
|
|
|
+ for i, article in enumerate(articles):
|
|
|
+ # Artikli pealkiri
|
|
|
+ elements.append(Paragraph(f"{i+1}. {article['title']}", title_style))
|
|
|
+
|
|
|
+ # Autorid
|
|
|
+ if article['authors']:
|
|
|
+ authors_text = ", ".join(article['authors'])
|
|
|
+ elements.append(Paragraph(f"<b>Autorid:</b> {authors_text}", subtitle_style))
|
|
|
+
|
|
|
+ # Metaandmed tabelina
|
|
|
+ metadata_data = []
|
|
|
+
|
|
|
+ if article['year'] and article['year'] != 'N/A':
|
|
|
+ metadata_data.append(['Aasta:', str(article['year'])])
|
|
|
+
|
|
|
+ if article['journal'] and article['journal'] != 'N/A':
|
|
|
+ metadata_data.append(['Žurnaal:', article['journal']])
|
|
|
+
|
|
|
+ if article['doi']:
|
|
|
+ metadata_data.append(['DOI:', article['doi']])
|
|
|
+
|
|
|
+ if article['relevance_score'] and article['relevance_score'] != 'N/A':
|
|
|
+ metadata_data.append(['Relevantsus:', f"{article['relevance_score']}/10"])
|
|
|
+
|
|
|
+ if metadata_data:
|
|
|
+ metadata_table = Table(metadata_data, colWidths=[2*cm, 12*cm])
|
|
|
+ metadata_table.setStyle(TableStyle([
|
|
|
+ ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
|
|
|
+ ('FONTSIZE', (0, 0), (-1, -1), 9),
|
|
|
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
|
|
|
+ ('TOPPADDING', (0, 0), (-1, -1), 6),
|
|
|
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
|
|
+ ('LEFTPADDING', (0, 0), (0, -1), 0),
|
|
|
+ ]))
|
|
|
+ elements.append(metadata_table)
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+ # Võtmesõnad ja meetodid
|
|
|
+ tags_data = []
|
|
|
+
|
|
|
+ if article['key_concepts']:
|
|
|
+ concepts_text = ", ".join(article['key_concepts'][:10]) # Piirangu 10 mõistele
|
|
|
+ tags_data.append(['Võtmesõnad:', concepts_text])
|
|
|
+
|
|
|
+ if article['methods_used']:
|
|
|
+ methods_text = ", ".join(article['methods_used'])
|
|
|
+ tags_data.append(['Meetodid:', methods_text])
|
|
|
+
|
|
|
+ if tags_data:
|
|
|
+ tags_table = Table(tags_data, colWidths=[2*cm, 12*cm])
|
|
|
+ tags_table.setStyle(TableStyle([
|
|
|
+ ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
|
|
|
+ ('FONTSIZE', (0, 0), (-1, -1), 9),
|
|
|
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 4),
|
|
|
+ ('TOPPADDING', (0, 0), (-1, -1), 4),
|
|
|
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
|
|
+ ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#5d6d7e')),
|
|
|
+ ('LEFTPADDING', (0, 0), (0, -1), 0),
|
|
|
+ ]))
|
|
|
+ elements.append(tags_table)
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+ # Abstrakt
|
|
|
+ if article['abstract_en']:
|
|
|
+ elements.append(Paragraph("<b>ABSTRAKT (inglise keeles):</b>", section_style))
|
|
|
+ abstract_text = article['abstract_en']
|
|
|
+ if len(abstract_text) > 800:
|
|
|
+ abstract_text = abstract_text[:800] + "..."
|
|
|
+ elements.append(Paragraph(abstract_text, normal_style))
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+ # Kokkuvõte
|
|
|
+ if article['summary_et']:
|
|
|
+ elements.append(Paragraph("<b>KOKKUVÕTE (eesti keeles):</b>", section_style))
|
|
|
+
|
|
|
+ # Formateeri kokkuvõte PDF-ile
|
|
|
+ summary = format_summary_for_pdf(article['summary_et'])
|
|
|
+
|
|
|
+ # Kasuta lihtsat tekstiparagraphi
|
|
|
+ elements.append(Paragraph(summary, normal_style))
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+ # Transpordi kontekst
|
|
|
+ if article['transport_context']:
|
|
|
+ elements.append(Paragraph("<b>TRANSFORDIPLANEERIMISE KONTEKST:</b>", section_style))
|
|
|
+ context_text = format_context_for_pdf(article['transport_context'])
|
|
|
+ if context_text:
|
|
|
+ elements.append(Paragraph(context_text, normal_style))
|
|
|
+ elements.append(Spacer(1, 12))
|
|
|
+
|
|
|
+ # Allikfail ja töötlemise info
|
|
|
+ footer_info = []
|
|
|
+ if article['source_file']:
|
|
|
+ source_name = os.path.basename(article['source_file'])
|
|
|
+ footer_info.append(f"Allikfail: {source_name}")
|
|
|
+
|
|
|
+ if article['processing_date']:
|
|
|
+ # Proovi parsida kuupäeva
|
|
|
+ try:
|
|
|
+ # Eemalda mikrosekundid kui on
|
|
|
+ date_str = article['processing_date']
|
|
|
+ if '.' in date_str:
|
|
|
+ date_str = date_str.split('.')[0]
|
|
|
+ date_str = date_str.replace('Z', '+00:00')
|
|
|
+ date_obj = datetime.fromisoformat(date_str)
|
|
|
+ footer_info.append(f"Töödeldud: {date_obj.strftime('%d.%m.%Y %H:%M')}")
|
|
|
+ except Exception as e:
|
|
|
+ # Kui ei õnnestu parsida, kuva algne string (lõigatud)
|
|
|
+ footer_info.append(f"Töödeldud: {article['processing_date'][:19]}")
|
|
|
+
|
|
|
+ if footer_info:
|
|
|
+ elements.append(Spacer(1, 6))
|
|
|
+ elements.append(Paragraph(" | ".join(footer_info), metadata_style))
|
|
|
+
|
|
|
+ # Lisa lehevahetus (välja arvatud viimase artikli puhul)
|
|
|
+ if i < len(articles) - 1:
|
|
|
+ elements.append(PageBreak())
|
|
|
+ else:
|
|
|
+ elements.append(Spacer(1, 24))
|
|
|
+
|
|
|
+ # Lisa lõppinfo
|
|
|
+ elements.append(Paragraph("=" * 80, metadata_style))
|
|
|
+ elements.append(Spacer(1, 6))
|
|
|
+ elements.append(Paragraph(f"Kokku eksporditud artikleid: {len(articles)}", metadata_style))
|
|
|
+ elements.append(Paragraph("Eksporditud Weaviate teadusartiklite andmebaasist", metadata_style))
|
|
|
+ elements.append(Paragraph(f"PDF genereeritud: {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}", metadata_style))
|
|
|
+
|
|
|
+ # Koosta PDF
|
|
|
+ doc.build(elements)
|
|
|
+
|
|
|
+ return len(articles)
|
|
|
+
|
|
|
+def main():
|
|
|
+ """Peamine funktsioon"""
|
|
|
+ print("=" * 60)
|
|
|
+ print("ARTIKLITE EKSPORT PDF FAILI")
|
|
|
+ print("=" * 60)
|
|
|
+
|
|
|
+ # Toob artiklid Weaviate'ist
|
|
|
+ print("Toon artikleid Weaviate'ist...")
|
|
|
+ articles = get_all_articles_from_weaviate()
|
|
|
+
|
|
|
+ if not articles:
|
|
|
+ print("Ei leidnud ühtegi artiklit Weaviate'is!")
|
|
|
+ return
|
|
|
+
|
|
|
+ print(f"Leidsin {len(articles)} artiklit")
|
|
|
+
|
|
|
+ # Genereeri PDF failinimi
|
|
|
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
+ output_dir = "./data/exports"
|
|
|
+ os.makedirs(output_dir, exist_ok=True)
|
|
|
+ output_filename = os.path.join(output_dir, f"artiklid_eksport_{timestamp}.pdf")
|
|
|
+
|
|
|
+ # Loo PDF
|
|
|
+ print(f"Loon PDF faili: {output_filename}")
|
|
|
+ try:
|
|
|
+ article_count = create_pdf_from_articles(articles, output_filename)
|
|
|
+
|
|
|
+ print("=" * 60)
|
|
|
+ print(f"✅ VALMIS! Loodud PDF fail: {output_filename}")
|
|
|
+ print(f" - Eksporditud artikleid: {article_count}")
|
|
|
+ print(f" - Faili suurus: {os.path.getsize(output_filename) / 1024:.1f} KB")
|
|
|
+ print("=" * 60)
|
|
|
+
|
|
|
+ # Näita esimese artikli pealkirja
|
|
|
+ if articles:
|
|
|
+ print("\nEsimesed artiklid:")
|
|
|
+ for i, article in enumerate(articles[:3]):
|
|
|
+ title_preview = article['title']
|
|
|
+ if len(title_preview) > 60:
|
|
|
+ title_preview = title_preview[:60] + "..."
|
|
|
+ print(f" {i+1}. {title_preview}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"\n❌ VIGA PDF loomisel: {e}")
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|