| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- # save_articles_to_pdf.py
- import os
- import sys
- import re
- from datetime import datetime
- from reportlab.lib.pagesizes import letter, A4
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
- from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER, TA_LEFT
- from reportlab.lib import colors
- from reportlab.lib.units import inch, cm
- from reportlab.pdfbase import pdfmetrics
- from reportlab.pdfbase.ttfonts import TTFont
- import json
- # Lisa src kaust Pythoni teele
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
- from src.weaviate_client import WeaviateClient
- def clean_html_tags(text):
- """Puhasta tekst HTML/XML siltidest ReportLab-i jaoks"""
- if not text:
- return ""
-
- # Eemalda kõik HTML/XML siltid
- text = re.sub(r'<[^>]+>', '', text)
-
- # Asenda erimärgid ReportLab-ile sobivate märkidega
- replacements = {
- ' ': ' ',
- '&': '&',
- '<': '<',
- '>': '>',
- '"': '"',
- ''': "'",
- ''': "'",
- '\u00a0': ' ', # mitte-tühik
- '\u2026': '...', # ellipsis
- '\u2013': '-', # n-sild
- '\u2014': '-', # m-sild
- '\u2018': "'", # vasak ülakoma
- '\u2019': "'", # parem ülakoma
- '\u201c': '"', # vasak jutumärk
- '\u201d': '"', # parem jutumärk
- }
-
- for old, new in replacements.items():
- text = text.replace(old, new)
-
- return text
- def clean_markdown_for_pdf(text):
- """Konverteeri markdown ReportLab-ile sobivaks tekstiks"""
- if not text:
- return ""
-
- # Kui ei ole string, konverteeri stringiks
- if not isinstance(text, str):
- text = str(text)
-
- # Eemalda HTML siltid
- text = clean_html_tags(text)
-
- # Asenda markdown pealkirjad
- text = re.sub(r'#{1,6}\s+', '', text) # Eemalda # pealkirjad
- text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # Asenda **bold** lihtsalt tekstiga
- text = re.sub(r'\*(.+?)\*', r'\1', text) # Asenda *italic* lihtsalt tekstiga
- text = re.sub(r'__(.+?)__', r'\1', text) # Asenda __underline__ lihtsalt tekstiga
- text = re.sub(r'~~(.+?)~~', r'\1', text) # Asenda ~~strikethrough~~ lihtsalt tekstiga
-
- # Asenda loetelud (PARANDATUD: kasuta \\1 mitte \1)
- text = re.sub(r'^\s*[-*+]\s+', '• ', text, flags=re.MULTILINE)
- text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) # PARANDATUD
-
- # Asenda koodiblokid
- text = re.sub(r'```[^`]+```', '', text) # Eemalda koodiblokid
- text = re.sub(r'`([^`]+)`', r'[\1]', text) # Asenda inline kood
-
- # Asenda lingid
- text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Eemalda lingid
-
- # Eemalda liigsed tühikud
- text = re.sub(r'\s+', ' ', text)
-
- return text.strip()
- def clean_json_markers(text):
- """
- Eemaldab JSON ümber olevad ```json ja ``` markerid
- """
- # Eemalda algusest
- text = re.sub(r'^```json\s*', '', text, flags=re.MULTILINE)
- # Eemalda lõpust
- text = re.sub(r'\s*```$', '', text, flags=re.MULTILINE)
- # Eemalda kõikjalt kui on
- text = re.sub(r'\s*```(json)?\s*', '', text)
- return text.strip()
- def clean_json_string(text):
- """Puhasta JSON string"""
- # Eemalda reavahetused ja liigsed tühikud
- text = text.replace('\n', '').replace('\r', '')
- # Asenda \\" tavaliste jutumärkidega
- text = text.replace('\\"', '"')
- # Eemalda esimesed ja viimased jutumärgid kui vaja
- if text.startswith('"') and text.endswith('"'):
- text = text[1:-1]
- return text
- def extract_and_format_json(data):
- """Eralda ja vorminda JSON andmed"""
- formatted_parts = []
-
- # Võti-väärtus paaride kaart
- key_map = {
- "theoretical_contribution": "Theoretical contribution",
- "practical_applicability": "Practical applicability",
- "problem_solving": "Problem solving",
- "limitations": "Limitations",
- "future_research": "Future research",
- "methodology": "Methodology"
- }
-
- for key, title in key_map.items():
- if key in data and data[key]:
- formatted_parts.append(f"{title}")
- formatted_parts.append(str(data[key]))
- formatted_parts.append("") # tühi rida
-
- return "\n".join(formatted_parts)
- def process_json_text(input_text):
- """Pööra JSON tekst loetavaks vorminguks"""
- try:
- # Parse esimene kiht
- parsed = json.loads(input_text)
-
- # Otsi analüüsi andmeid
- analysis_data = None
-
- # Variant 1: "analysis" väljal on JSON string
- if "analysis" in parsed:
- try:
- # Puhasta ja parse sisemine JSON
- clean_analysis = clean_json_string(str(parsed["analysis"]))
- analysis_data = json.loads(clean_analysis)
- except:
- # Kui ei saa JSON-iks, kasuta otse
- analysis_data = parsed.get("analysis", {})
-
- # Variant 2: andmed otse pealkirjade all
- elif any(key in parsed for key in ["theoretical_contribution", "practical_applicability"]):
- analysis_data = parsed
-
- # Variant 3: teised võimalused
- else:
- # Proovi leida JSON kuskil mujal
- for key, value in parsed.items():
- if isinstance(value, str) and any(x in value.lower() for x in ["theoretical", "practical", "contribution"]):
- try:
- clean_val = clean_json_string(value)
- analysis_data = json.loads(clean_val)
- break
- except:
- continue
-
- if analysis_data:
- return extract_and_format_json(analysis_data)
- else:
- return "No analysis data found in JSON"
-
- except json.JSONDecodeError as e:
- return f"JSON parsing error: {str(e)}"
- except Exception as e:
- return f"Error: {str(e)}"
- def parse_transport_context(context_data):
- context_data = clean_json_markers(context_data)
- """Parsi transpordikonteksti JSON-ist loetavaks"""
- if isinstance(context_data, str):
- # Proovi parsida string JSON-iks
- try:
- return json.loads(context_data)
- except json.JSONDecodeError:
- # Kui ei õnnestu, tagasta puhastatud string
- return {"raw_analysis": clean_markdown_for_pdf(context_data)}
- elif isinstance(context_data, dict):
- # Puhasta kõik stringiväljad
- cleaned = {}
- for key, value in context_data.items():
- if isinstance(value, str):
- cleaned[key] = clean_markdown_for_pdf(value)
- else:
- cleaned[key] = value
- return cleaned
- return context_data
- def format_context_for_pdf(parsed_context):
- """Vorminda parsitud kontekst PDF-i jaoks"""
- if isinstance(parsed_context, dict):
- formatted = []
-
- if 'theoretical_contribution' in parsed_context and parsed_context['theoretical_contribution']:
- formatted.append(f"TEOREETILINE PANUS: {parsed_context['theoretical_contribution']}")
-
- if 'practical_applicability' in parsed_context and parsed_context['practical_applicability']:
- formatted.append(f"PRAKTILINE RAKENDATAVUS: {parsed_context['practical_applicability']}")
-
- if 'problem_solving' in parsed_context and parsed_context['problem_solving']:
- formatted.append(f"PROBLEEMILAHDUS: {parsed_context['problem_solving']}")
-
- if 'limitations' in parsed_context and parsed_context['limitations']:
- formatted.append(f"PIIRANGUD: {parsed_context['limitations']}")
-
- if 'relevance_score' in parsed_context:
- formatted.append(f"RELEVANTSUSE SKOOR: {parsed_context['relevance_score']}/10")
-
- if 'analysis' in parsed_context and parsed_context['analysis']:
- formatted.append(f"ANALÜÜS: {parsed_context['analysis']}")
-
- if 'raw_analysis' in parsed_context and parsed_context['raw_analysis']:
- formatted.append(f"ANALÜÜS: {parsed_context['raw_analysis']}")
-
- return "\n\n".join(formatted)
- else:
- return clean_markdown_for_pdf(str(parsed_context))
- def get_all_articles_from_weaviate():
- """Toob kõik artiklid Weaviate'ist"""
- client = WeaviateClient()
- articles = []
-
- try:
- collection = client.client.collections.get("ScientificArticle")
-
- # Loendi kokku
- count_response = collection.aggregate.over_all(total_count=True)
- total = count_response.total_count
-
- print(f"Weaviate'is leidsin {total} artiklit")
-
- if total > 0:
- # Toob kõik artiklid
- response = collection.query.fetch_objects(limit=total)
-
- for obj in response.objects:
- try:
- article = {
- 'article_id': obj.properties.get('article_id', 'N/A'),
- 'title': clean_markdown_for_pdf(obj.properties.get('title', 'N/A')),
- 'authors': obj.properties.get('authors', []),
- 'year': obj.properties.get('year', 'N/A'),
- 'journal': clean_markdown_for_pdf(obj.properties.get('journal', 'N/A')),
- 'doi': obj.properties.get('doi', ''),
- 'abstract_en': clean_markdown_for_pdf(obj.properties.get('abstract_en', '')),
- 'summary_et': clean_markdown_for_pdf(obj.properties.get('summary_et', '')),
- 'key_concepts': [clean_markdown_for_pdf(c) for c in obj.properties.get('key_concepts', [])],
- 'methods_used': [clean_markdown_for_pdf(m) for m in obj.properties.get('methods_used', [])],
- 'transport_context': parse_transport_context(obj.properties.get('transport_context', {})),
- 'relevance_score': obj.properties.get('relevance_score', 'N/A'),
- 'processing_date': obj.properties.get('processing_date', ''),
- 'source_file': obj.properties.get('source_file', '')
- }
- articles.append(article)
- except Exception as e:
- print(f" Viga artikli {obj.properties.get('article_id', 'unknown')} töötlemisel: {e}")
- # Lisa artikel ilma puhastuseta
- article = {
- 'article_id': obj.properties.get('article_id', 'N/A'),
- 'title': str(obj.properties.get('title', 'N/A')),
- 'authors': obj.properties.get('authors', []),
- 'year': obj.properties.get('year', 'N/A'),
- 'journal': str(obj.properties.get('journal', 'N/A')),
- 'doi': obj.properties.get('doi', ''),
- 'abstract_en': str(obj.properties.get('abstract_en', '')),
- 'summary_et': str(obj.properties.get('summary_et', '')),
- 'key_concepts': [str(c) for c in obj.properties.get('key_concepts', [])],
- 'methods_used': [str(m) for m in obj.properties.get('methods_used', [])],
- 'transport_context': str(obj.properties.get('transport_context', {})),
- 'relevance_score': obj.properties.get('relevance_score', 'N/A'),
- 'processing_date': obj.properties.get('processing_date', ''),
- 'source_file': obj.properties.get('source_file', '')
- }
- articles.append(article)
-
- except Exception as e:
- print(f"Viga artiklite toomisel: {e}")
- import traceback
- traceback.print_exc()
- finally:
- client.close()
-
- return articles
- def format_summary_for_pdf(summary):
- """Vorminda kokkuvõte PDF-ile sobivaks"""
- if not summary:
- return ""
-
- # Kui ei ole string, konverteeri
- if not isinstance(summary, str):
- summary = str(summary)
-
- # Eemalda kõik vormindus ja tee lihtsaks tekstiks
- summary = clean_markdown_for_pdf(summary)
-
- # Lisa uued read peamiste sektsioonide ette
- summary = summary.replace('1. ARTIKLI PEAMISED PUNKTID:', '\n1. ARTIKLI PEAMISED PUNKTID:\n')
- summary = summary.replace('2. KASUTATUD MEETODID:', '\n\n2. KASUTATUD MEETODID:\n')
- summary = summary.replace('3. PEAMISED TULEMUSED:', '\n\n3. PEAMISED TULEMUSED:\n')
- summary = summary.replace('4. JÄRELDUSED JA SOOVITUSED:', '\n\n4. JÄRELDUSED JA SOOVITUSED:\n')
- summary = summary.replace('5. TRANSFORDIPLANEERIMISE KONTEKST:', '\n\n5. TRANSFORDIPLANEERIMISE KONTEKST:\n')
-
- # Asenda liigsed reavahetused
- summary = re.sub(r'\n{3,}', '\n\n', summary)
-
- # Lõika liiga pikk tekst
- if len(summary) > 4000:
- summary = summary[:4000] + "... [kokkuvõte lõigatud, liiga pikk]"
-
- return summary
- def create_pdf_from_articles(articles, output_filename):
- """Loob PDF faili artiklitest"""
-
- # Loo PDF dokument
- doc = SimpleDocTemplate(
- output_filename,
- pagesize=A4,
- rightMargin=72,
- leftMargin=72,
- topMargin=72,
- bottomMargin=72
- )
-
- # Stiilide loomine
- styles = getSampleStyleSheet()
-
- # Kohandatud stiilid
- title_style = ParagraphStyle(
- 'CustomTitle',
- parent=styles['Heading1'],
- fontSize=14,
- spaceAfter=12,
- textColor=colors.HexColor('#2c3e50'),
- alignment=TA_LEFT
- )
-
- subtitle_style = ParagraphStyle(
- 'CustomSubtitle',
- parent=styles['Heading2'],
- fontSize=12,
- spaceAfter=6,
- textColor=colors.HexColor('#34495e'),
- alignment=TA_LEFT
- )
-
- section_style = ParagraphStyle(
- 'CustomSection',
- parent=styles['Heading3'],
- fontSize=11,
- spaceAfter=6,
- spaceBefore=12,
- textColor=colors.HexColor('#7f8c8d'),
- alignment=TA_LEFT
- )
-
- normal_style = ParagraphStyle(
- 'CustomNormal',
- parent=styles['Normal'],
- fontSize=10,
- spaceAfter=6,
- alignment=TA_JUSTIFY,
- leading=14 # Reavahe
- )
-
- metadata_style = ParagraphStyle(
- 'CustomMetadata',
- parent=styles['Normal'],
- fontSize=9,
- spaceAfter=3,
- textColor=colors.HexColor('#5d6d7e'),
- alignment=TA_LEFT
- )
-
- # Elementide kogumine
- elements = []
-
- # Pealkiri ja kokkuvõte
- elements.append(Paragraph("TEADUSARTIKLITE ANDMEBAAS", title_style))
- elements.append(Spacer(1, 12))
-
- today = datetime.now().strftime("%d.%m.%Y %H:%M")
- elements.append(Paragraph(f"Eksporditud: {today}", metadata_style))
- elements.append(Paragraph(f"Artikleid kokku: {len(articles)}", metadata_style))
- elements.append(Spacer(1, 24))
-
- # Iga artikli jaoks
- for i, article in enumerate(articles):
- # Artikli pealkiri
- elements.append(Paragraph(f"{i+1}. {article['title']}", title_style))
-
- # Autorid
- if article['authors']:
- authors_text = ", ".join(article['authors'])
- elements.append(Paragraph(f"<b>Autorid:</b> {authors_text}", subtitle_style))
-
- # Metaandmed tabelina
- metadata_data = []
-
- if article['year'] and article['year'] != 'N/A':
- metadata_data.append(['Aasta:', str(article['year'])])
-
- if article['journal'] and article['journal'] != 'N/A':
- metadata_data.append(['Žurnaal:', article['journal']])
-
- if article['doi']:
- metadata_data.append(['DOI:', article['doi']])
-
- if article['relevance_score'] and article['relevance_score'] != 'N/A':
- metadata_data.append(['Relevantsus:', f"{article['relevance_score']}/10"])
-
- if metadata_data:
- metadata_table = Table(metadata_data, colWidths=[2*cm, 12*cm])
- metadata_table.setStyle(TableStyle([
- ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
- ('FONTSIZE', (0, 0), (-1, -1), 9),
- ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
- ('TOPPADDING', (0, 0), (-1, -1), 6),
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
- ('LEFTPADDING', (0, 0), (0, -1), 0),
- ]))
- elements.append(metadata_table)
- elements.append(Spacer(1, 12))
-
- # Võtmesõnad ja meetodid
- tags_data = []
-
- if article['key_concepts']:
- concepts_text = ", ".join(article['key_concepts'][:10]) # Piirangu 10 mõistele
- tags_data.append(['Võtmesõnad:', concepts_text])
-
- if article['methods_used']:
- methods_text = ", ".join(article['methods_used'])
- tags_data.append(['Meetodid:', methods_text])
-
- if tags_data:
- tags_table = Table(tags_data, colWidths=[2*cm, 12*cm])
- tags_table.setStyle(TableStyle([
- ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
- ('FONTSIZE', (0, 0), (-1, -1), 9),
- ('BOTTOMPADDING', (0, 0), (-1, -1), 4),
- ('TOPPADDING', (0, 0), (-1, -1), 4),
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
- ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#5d6d7e')),
- ('LEFTPADDING', (0, 0), (0, -1), 0),
- ]))
- elements.append(tags_table)
- elements.append(Spacer(1, 12))
-
- # Abstrakt
- if article['abstract_en']:
- elements.append(Paragraph("<b>ABSTRAKT (inglise keeles):</b>", section_style))
- abstract_text = article['abstract_en']
- if len(abstract_text) > 800:
- abstract_text = abstract_text[:800] + "..."
- elements.append(Paragraph(abstract_text, normal_style))
- elements.append(Spacer(1, 12))
-
- # Kokkuvõte
- if article['summary_et']:
- elements.append(Paragraph("<b>KOKKUVÕTE (eesti keeles):</b>", section_style))
-
- # Formateeri kokkuvõte PDF-ile
- summary = format_summary_for_pdf(article['summary_et'])
-
- # Kasuta lihtsat tekstiparagraphi
- elements.append(Paragraph(summary, normal_style))
- elements.append(Spacer(1, 12))
-
- # Transpordi kontekst
- if article['transport_context']:
- # Debugimiseks
- debugger_data = str(article['transport_context'])
- print("----------- \"" + article['title'] + "\" -----------")
- print("----------- article['transport_context'] -------------")
- print(debugger_data)
- elements.append(Paragraph("<b>TRANSFORDIPLANEERIMISE KONTEKST:</b>", section_style))
- context_text = format_context_for_pdf(article['transport_context']['relevance_score'])
- if context_text:
- elements.append(Paragraph("RELEVANTSUSE SKOOR: " + context_text, normal_style))
- elements.append(Spacer(1, 1))
-
- elements.append(Paragraph("<b>ANALÜÜS:</b>", normal_style))
- analysis_text = article['transport_context']['analysis']
- # Proovi leida theoretical_contribution regex'iga
- match = re.search(r'"theoretical_contribution":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
- if match:
- context_text = match.group(1)
- if context_text:
- elements.append(Paragraph("<b>TEOREETILINE PANUS:</b>", normal_style))
- elements.append(Paragraph(context_text, normal_style))
- # Proovi leida practical_applicability regex'iga
- match = re.search(r'"practical_applicability":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
- if match:
- context_text = match.group(1)
- if context_text:
- elements.append(Paragraph("<b>PRAKTILINE RAKENDATAVUS:</b>", normal_style))
- elements.append(Paragraph(context_text, normal_style))
- # Proovi leida problem_solving regex'iga
- match = re.search(r'"problem_solving":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
- if match:
- context_text = match.group(1)
- if context_text:
- elements.append(Paragraph("<b>PROBLEEMILAHENDUS:</b>", normal_style))
- elements.append(Paragraph(context_text, normal_style))
- # Proovi leida limitations regex'iga
- match = re.search(r'"limitations":\s*"([^"]*(?:\\"[^"]*)*)"', analysis_text)
- if match:
- context_text = match.group(1)
- if context_text:
- elements.append(Paragraph("<b>PIIRANGUD:</b>", normal_style))
- elements.append(Paragraph(context_text, normal_style))
- # Proovi leida relevance_score regex'iga
- match = re.search(r'"relevance_score":\s*(\d+(?:\.\d+)?)', analysis_text)
- if match:
- context_text = match.group(1)
- if context_text:
- elements.append(Paragraph("<b>RELEVANTSUSE SKOOR:</b> " + context_text, normal_style))
-
- # Allikfail ja töötlemise info
- footer_info = []
- if article['source_file']:
- source_name = os.path.basename(article['source_file'])
- footer_info.append(f"Allikfail: {source_name}")
-
- if article['processing_date']:
- # Proovi parsida kuupäeva
- try:
- # Eemalda mikrosekundid kui on
- date_str = article['processing_date']
- if '.' in date_str:
- date_str = date_str.split('.')[0]
- date_str = date_str.replace('Z', '+00:00')
- date_obj = datetime.fromisoformat(date_str)
- footer_info.append(f"Töödeldud: {date_obj.strftime('%d.%m.%Y %H:%M')}")
- except Exception as e:
- # Kui ei õnnestu parsida, kuva algne string (lõigatud)
- footer_info.append(f"Töödeldud: {article['processing_date'][:19]}")
-
- if footer_info:
- elements.append(Spacer(1, 6))
- elements.append(Paragraph(" | ".join(footer_info), metadata_style))
-
- # Lisa lehevahetus (välja arvatud viimase artikli puhul)
- if i < len(articles) - 1:
- elements.append(PageBreak())
- else:
- elements.append(Spacer(1, 24))
-
- # Lisa lõppinfo
- elements.append(Paragraph("=" * 80, metadata_style))
- elements.append(Spacer(1, 6))
- elements.append(Paragraph(f"Kokku eksporditud artikleid: {len(articles)}", metadata_style))
- elements.append(Paragraph("Eksporditud Weaviate teadusartiklite andmebaasist", metadata_style))
- elements.append(Paragraph(f"PDF genereeritud: {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}", metadata_style))
-
- # Koosta PDF
- doc.build(elements)
-
- return len(articles)
- def main():
- """Peamine funktsioon"""
- print("=" * 60)
- print("ARTIKLITE EKSPORT PDF FAILI")
- print("=" * 60)
-
- # Toob artiklid Weaviate'ist
- print("Toon artikleid Weaviate'ist...")
- articles = get_all_articles_from_weaviate()
-
- if not articles:
- print("Ei leidnud ühtegi artiklit Weaviate'is!")
- return
-
- print(f"Leidsin {len(articles)} artiklit")
-
- # Genereeri PDF failinimi
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- output_dir = "./data/exports"
- os.makedirs(output_dir, exist_ok=True)
- output_filename = os.path.join(output_dir, f"artiklid_eksport_{timestamp}.pdf")
-
- # Loo PDF
- print(f"Loon PDF faili: {output_filename}")
- try:
- article_count = create_pdf_from_articles(articles, output_filename)
-
- print("=" * 60)
- print(f"✅ VALMIS! Loodud PDF fail: {output_filename}")
- print(f" - Eksporditud artikleid: {article_count}")
- print(f" - Faili suurus: {os.path.getsize(output_filename) / 1024:.1f} KB")
- print("=" * 60)
-
- # Näita esimese artikli pealkirja
- if articles:
- print("\nEsimesed artiklid:")
- for i, article in enumerate(articles[:3]):
- title_preview = article['title']
- if len(title_preview) > 60:
- title_preview = title_preview[:60] + "..."
- print(f" {i+1}. {title_preview}")
-
- except Exception as e:
- print(f"\n❌ VIGA PDF loomisel: {e}")
- import traceback
- traceback.print_exc()
- if __name__ == "__main__":
- main()
|