| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703 |
- import os
- import sys
- import re
- from datetime import datetime
- from reportlab.lib.pagesizes import A4
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
- from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT
- from reportlab.lib import colors
- from reportlab.lib.units import cm
- import json
- import traceback
- # Lisa src kaust Pythoni teele
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
- from src.weaviate_client import WeaviateClient
- # ============================================================================
- # STRINGI PUHASTAMISE FUNKTSIOONID
- # ============================================================================
- def clean_html_tags(text):
- """Puhasta tekst HTML/XML siltidest ReportLab-i jaoks"""
- if not text:
- return ""
- # Eemalda kõik HTML/XML sildid
- text = re.sub(r'<[^>]+>', '', text)
- # Asenda erimärgid ReportLab-ile sobivate märkidega
- replacements = {
- ' ': ' ',
- '&': '&',
- '<': '<',
- '>': '>',
- '"': '"',
- '’': "'",
- '‘': "'",
- '\u00a0': ' ', # mitte-tühik
- '\u2026': '...', # ellipsis
- '\u2013': '-', # n-sild
- '\u2014': '-', # m-sild
- '\u2018': "'", # vasak ülakoma
- '\u2019': "'", # parem ülakoma
- '\u201c': '"', # vasak jutumärk
- '\u201d': '"', # parem jutumärk
- }
- for old, new in replacements.items():
- text = text.replace(old, new)
- return text
- def clean_markdown_for_pdf(text):
- """Konverteeri markdown ReportLab-ile sobivaks tekstiks"""
- if not text:
- return ""
- # Kui ei ole string, konverteeri stringiks
- if not isinstance(text, str):
- text = str(text)
- # Eemalda HTML sildid
- text = clean_html_tags(text)
- # Asenda markdown pealkirjad (# # # jne)
- text = re.sub(r'#{1,6}\s+', '', text)
- # Asenda bold, italic, strikethrough markeeringud
- text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # **bold**
- text = re.sub(r'\*(.+?)\*', r'\1', text) # *italic*
- text = re.sub(r'__(.+?)__', r'\1', text) # __underline__
- text = re.sub(r'~~(.+?)~~', r'\1', text) # ~~strikethrough~~
- # Asenda loetelud
- text = re.sub(r'^\s*[-*+]\s+', '• ', text, flags=re.MULTILINE)
- text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE)
- # Eemalda koodiblokid
- text = re.sub(r'```[^`]+```', '', text)
- text = re.sub(r'`([^`]+)`', r'[\1]', text)
- # Eemalda lingid (jäta ainult tekst)
- text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
- # Eemalda liigsed tühikud
- text = re.sub(r'\s+', ' ', text)
- return text.strip()
- def format_summary_for_pdf(summary):
- """Vorminda kokkuvõte PDF-ile sobivaks"""
- if not summary:
- return ""
- # Kui ei ole string, konverteeri
- if not isinstance(summary, str):
- summary = str(summary)
- # Eemalda kõik vormindus
- summary = clean_markdown_for_pdf(summary)
- # Lisa uued read peamiste sektsioonide ette
- summary = summary.replace('1. ARTIKLI PEAMISED PUNKTID:', '\n1. ARTIKLI PEAMISED PUNKTID:\n')
- summary = summary.replace('2. KASUTATUD MEETODID:', '\n\n2. KASUTATUD MEETODID:\n')
- summary = summary.replace('3. PEAMISED TULEMUSED:', '\n\n3. PEAMISED TULEMUSED:\n')
- summary = summary.replace('4. JÄRELDUSED JA SOOVITUSED:', '\n\n4. JÄRELDUSED JA SOOVITUSED:\n')
- summary = summary.replace('5. TRANSFORDIPLANEERIMISE KONTEKST:', '\n\n5. TRANSFORDIPLANEERIMISE KONTEKST:\n')
- # Asenda liigsed reavahetused
- summary = re.sub(r'\n{3,}', '\n\n', summary)
- # Lõika liiga pikk tekst
- if len(summary) > 4000:
- summary = summary[:4000] + "... [kokkuvõte lõigatud, liiga pikk]"
- return summary
- # ============================================================================
- # TRANSPORT KONTEKSTI PARSING
- # ============================================================================
- def extract_json_field(json_string, field_name):
- """
- Eralda JSON stringist konkreetne väli regex abil.
- Args:
- json_string: JSON tekst stringina
- field_name: välja nimi (nt "theoretical_contribution")
- Returns:
- Välja väärtus või None
- """
- if not json_string:
- return None
- pattern = rf'"{field_name}":\s*"([^"]*(?:\\"[^"]*)*)"'
- match = re.search(pattern, json_string)
- if match:
- return match.group(1)
- return None
- def extract_relevance_score(json_string):
- """Eralda relevance_score JSON stringist või tekstist."""
- if not json_string:
- return None
- pattern = r'"relevance_score":\s*(\d+(?:\.\d+)?)'
- match = re.search(pattern, json_string)
- if match:
- return match.group(1)
- return None
- def format_transport_context(transport_context):
- """
- Vorminda transpordi kontekst PDF-ile.
- """
- if not transport_context:
- return None
- # PARANDUS: Leia analüüsi tekst
- analysis_text = None
-
- # Kui on dict ja sisaldab 'analysis' võtit
- if isinstance(transport_context, dict):
- if 'analysis' in transport_context:
- analysis_text = transport_context['analysis']
- else:
- # Võib-olla kogu dict ON juba analysis?
- analysis_text = str(transport_context)
- elif isinstance(transport_context, str):
- analysis_text = transport_context
- else:
- return None
- if not analysis_text:
- return None
- txt = str(analysis_text)
-
- # EEMALDA ```json ... ``` markerid
- txt = re.sub(r'```json\s*', '', txt)
- txt = re.sub(r'\s*```', '', txt)
- txt = txt.strip()
- # DEBUG: Prindi välja pärast puhastamist
- #print("DEBUG after cleanup:", txt[:200])
- parsed = None
- # 1) Proovi parsida JSON otse
- try:
- parsed = json.loads(txt)
- #print("✅ JSON parsed successfully!")
- #print(f"DEBUG parsed keys: {list(parsed.keys())}")
- except json.JSONDecodeError as e:
- #print(f"❌ JSON parse failed: {e}")
- # Kui ei õnnestu, proovi leida {...} blokk
- m = re.search(r'\{.*\}', txt, flags=re.DOTALL)
- if m:
- json_candidate = m.group(0)
- try:
- parsed = json.loads(json_candidate)
- #print("✅ JSON parsed from candidate!")
- except json.JSONDecodeError as e2:
- print(f"❌ Candidate parse also failed: {e2}")
- parsed = None
- formatted_parts = []
- # Kui JSON parsimine õnnestus
- if isinstance(parsed, dict):
- #print("✅ Using parsed JSON dict")
-
- # KUI parsed sisaldab 'analysis' võtit, siis see on VEEL ÜKS string!
- # Peame seda UUESTI parsima!
- # KUI parsed sisaldab 'analysis' võtit, siis see on VEEL ÜKS string!
- if 'analysis' in parsed and isinstance(parsed['analysis'], str):
- #print("⚠️ 'analysis' is still a string, parsing again...")
- inner_txt = parsed['analysis']
- # Eemalda ```json markerid uuesti
- inner_txt = re.sub(r'```json\s*', '', inner_txt)
- inner_txt = re.sub(r'\s*```', '', inner_txt)
- inner_txt = inner_txt.strip()
- # PARANDUS: Asenda valed escaped quotes
- # \"word\" → "word" (ainult siis kui on tähtede vahel)
- inner_txt = re.sub(r'\\"([a-züõäöA-ZÜÕÄÖ]+)\\"', r'"\1"', inner_txt)
-
- # PARANDUS: Escape jutumärgid, mis on stringi väärtuste sees
- # Leia kõik "key": "value" paare ja escape "value" sees olevad jutumärgid
- def escape_quotes_in_values(match):
- key = match.group(1)
- value = match.group(2)
- # Escape jutumärgid value sees
- value_escaped = value.replace('"', '\\"')
- return f'"{key}": "{value_escaped}"'
-
- inner_txt = re.sub(r'"(\w+)":\s*"([^"]*(?:"[^"]*)*)"', escape_quotes_in_values, inner_txt)
-
- try:
- parsed = json.loads(inner_txt)
- #print("✅ Inner JSON parsed successfully!")
- #print(f"DEBUG inner parsed keys: {list(parsed.keys())}")
- except json.JSONDecodeError as e:
- #print(f"❌ Inner JSON parse failed: {e}")
-
- # AGRESSIIVNE PARANDUS: kasuta regex fallback'i
- #print("⚠️ Falling back to regex extraction...")
-
- # Taasta originaal inner_txt (ilma escapimiseta)
- inner_txt = parsed['analysis']
- inner_txt = re.sub(r'```json\s*', '', inner_txt)
- inner_txt = re.sub(r'\s*```', '', inner_txt)
- inner_txt = inner_txt.strip()
-
- # Kasuta regex'i otse inner_txt pealt
- temp_parts = []
-
- match = re.search(r'"theoretical_contribution":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
- if match:
- temp_parts.append("TEOREETILINE PANUS:\n" + match.group(1))
-
- match = re.search(r'"practical_applicability":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
- if match:
- temp_parts.append("PRAKTILINE RAKENDATAVUS:\n" + match.group(1))
-
- match = re.search(r'"problem_solving":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
- if match:
- temp_parts.append("PROBLEEMILAHENDUS:\n" + match.group(1))
-
- match = re.search(r'"limitations":\s*"(.*?)"(?=\s*,\s*")', inner_txt, flags=re.DOTALL)
- if match:
- temp_parts.append("PIIRANGUD:\n" + match.group(1))
-
- match = re.search(r'"relevance_score":\s*(\d+)', inner_txt)
- if match:
- temp_parts.append(f"RELEVANTSUSE SKOOR: {match.group(1)}/10")
-
- if temp_parts:
- #print(f"✅ Regex extracted {len(temp_parts)} parts")
- return "\n\n".join(temp_parts)
-
- # Nüüd kasuta parsed dict'i
- if parsed.get("theoretical_contribution"):
- formatted_parts.append(
- "TEOREETILINE PANUS:\n" + str(parsed["theoretical_contribution"])
- )
- if parsed.get("practical_applicability"):
- formatted_parts.append(
- "PRAKTILINE RAKENDATAVUS:\n" + str(parsed["practical_applicability"])
- )
- if parsed.get("problem_solving"):
- formatted_parts.append(
- "PROBLEEMILAHENDUS:\n" + str(parsed["problem_solving"])
- )
- if parsed.get("limitations"):
- formatted_parts.append(
- "PIIRANGUD:\n" + str(parsed["limitations"])
- )
- if "relevance_score" in parsed:
- formatted_parts.append(
- f"RELEVANTSUSE SKOOR: {parsed['relevance_score']}/10"
- )
- return "\n\n".join(formatted_parts) if formatted_parts else None
- # Kui JSON ei õnnestunud → kasuta regex-i
- print("⚠️ Using regex fallback")
-
- # Regex peab nüüd käsitlema newline't – kasuta re.DOTALL
- match = re.search(r'"theoretical_contribution":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
- if match:
- content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
- if content:
- formatted_parts.append(f"TEOREETILINE PANUS:\n{content}")
- match = re.search(r'"practical_applicability":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
- if match:
- content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
- if content:
- formatted_parts.append(f"PRAKTILINE RAKENDATAVUS:\n{content}")
- match = re.search(r'"problem_solving":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
- if match:
- content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
- if content:
- formatted_parts.append(f"PROBLEEMILAHENDUS:\n{content}")
- match = re.search(r'"limitations":\s*"(.*?)"(?=\s*,|\s*})', txt, flags=re.DOTALL)
- if match:
- content = match.group(1).replace('\\n', '\n').replace('\\"', '"')
- if content:
- formatted_parts.append(f"PIIRANGUD:\n{content}")
- match = re.search(r'"relevance_score":\s*(\d+(?:\.\d+)?)', txt)
- if match:
- score = match.group(1)
- formatted_parts.append(f"RELEVANTSUSE SKOOR: {score}/10")
- return "\n\n".join(formatted_parts) if formatted_parts else None
- # ============================================================================
- # WEAVIATE ANDMEBAASIST PÄRING
- # ============================================================================
- def get_all_articles_from_weaviate():
- """Toob kõik artiklid Weaviate andmebaasist"""
- client = WeaviateClient()
- articles = []
- try:
- collection = client.client.collections.get("ScientificArticle")
- # Loendi kokku
- count_response = collection.aggregate.over_all(total_count=True)
- total = count_response.total_count
- print(f"Weaviate'is leidsin {total} artiklit")
- if total > 0:
- # Toob kõik artiklid
- response = collection.query.fetch_objects(limit=total)
- for obj in response.objects:
- try:
- article = {
- 'article_id': obj.properties.get('article_id', 'N/A'),
- 'title': clean_markdown_for_pdf(obj.properties.get('title', 'N/A')),
- 'authors': obj.properties.get('authors', []),
- 'year': obj.properties.get('year', 'N/A'),
- 'journal': clean_markdown_for_pdf(obj.properties.get('journal', 'N/A')),
- 'doi': obj.properties.get('doi', ''),
- 'abstract_en': clean_markdown_for_pdf(obj.properties.get('abstract_en', '')),
- 'summary_et': clean_markdown_for_pdf(obj.properties.get('summary_et', '')),
- 'key_concepts': [clean_markdown_for_pdf(c) for c in obj.properties.get('key_concepts', [])],
- 'methods_used': [clean_markdown_for_pdf(m) for m in obj.properties.get('methods_used', [])],
- 'transport_context': obj.properties.get('transport_context', {}),
- 'relevance_score': obj.properties.get('relevance_score', 'N/A'),
- 'processing_date': obj.properties.get('processing_date', ''),
- 'source_file': obj.properties.get('source_file', '')
- }
- articles.append(article)
- except Exception as e:
- print(f"⚠️ Viga artikli {obj.properties.get('article_id', 'unknown')} töötlemisel: {e}")
- # Jätka järgmise artikliga
- continue
- except Exception as e:
- print(f"❌ Viga artiklite toomisel: {e}")
- traceback.print_exc()
- finally:
- client.close()
- return articles
- # ============================================================================
- # PDF GENEREERIMINE
- # ============================================================================
- def create_pdf_from_articles(articles, output_filename):
- """Loob PDF faili artiklitest"""
- # Loo PDF dokument
- doc = SimpleDocTemplate(
- output_filename,
- pagesize=A4,
- rightMargin=72,
- leftMargin=72,
- topMargin=72,
- bottomMargin=72
- )
- # Stiilide loomine
- styles = getSampleStyleSheet()
- # Kohandatud stiilid
- title_style = ParagraphStyle(
- 'CustomTitle',
- parent=styles['Heading1'],
- fontSize=14,
- spaceAfter=12,
- textColor=colors.HexColor('#2c3e50'),
- alignment=TA_LEFT
- )
- subtitle_style = ParagraphStyle(
- 'CustomSubtitle',
- parent=styles['Heading2'],
- fontSize=12,
- spaceAfter=6,
- textColor=colors.HexColor('#34495e'),
- alignment=TA_LEFT
- )
- section_style = ParagraphStyle(
- 'CustomSection',
- parent=styles['Heading3'],
- fontSize=11,
- spaceAfter=6,
- spaceBefore=12,
- textColor=colors.HexColor('#7f8c8d'),
- alignment=TA_LEFT
- )
- normal_style = ParagraphStyle(
- 'CustomNormal',
- parent=styles['Normal'],
- fontSize=10,
- spaceAfter=6,
- alignment=TA_JUSTIFY,
- leading=14
- )
- metadata_style = ParagraphStyle(
- 'CustomMetadata',
- parent=styles['Normal'],
- fontSize=9,
- spaceAfter=3,
- textColor=colors.HexColor('#5d6d7e'),
- alignment=TA_LEFT
- )
- # Elementide kogumine
- elements = []
- # Pealkiri ja kokkuvõte
- elements.append(Paragraph("TEADUSARTIKLITE ANDMEBAAS", title_style))
- elements.append(Spacer(1, 12))
- today = datetime.now().strftime("%d.%m.%Y %H:%M")
- elements.append(Paragraph(f"Eksporditud: {today}", metadata_style))
- elements.append(Paragraph(f"Artikleid kokku: {len(articles)}", metadata_style))
- elements.append(Spacer(1, 24))
- # ========================================================================
- # Iga artikli jaoks
- # ========================================================================
- for i, article in enumerate(articles):
- # ARTIKLI PEALKIRI
- elements.append(Paragraph(f"{i+1}. {article['title']}", title_style))
- print(f"✅ {i+1}. {article['title']}")
- # AUTORID
- if article['authors']:
- authors_text = ", ".join(article['authors'])
- elements.append(Paragraph(f"Autorid: {authors_text}", subtitle_style))
- # METAANDMED (aasta, žurnaal, DOI, relevantsus)
- metadata_data = []
- if article['year'] and article['year'] != 'N/A':
- metadata_data.append(['Aasta:', str(article['year'])])
- if article['journal'] and article['journal'] != 'N/A':
- metadata_data.append(['Žurnaal:', article['journal']])
- if article['doi']:
- metadata_data.append(['DOI:', article['doi']])
- if article['relevance_score'] and article['relevance_score'] != 'N/A':
- metadata_data.append(['Relevantsus:', f"{article['relevance_score']}/10"])
- if metadata_data:
- metadata_table = Table(metadata_data, colWidths=[2*cm, 12*cm])
- metadata_table.setStyle(TableStyle([
- ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
- ('FONTSIZE', (0, 0), (-1, -1), 9),
- ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
- ('TOPPADDING', (0, 0), (-1, -1), 6),
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
- ('LEFTPADDING', (0, 0), (0, -1), 0),
- ]))
- elements.append(metadata_table)
- elements.append(Spacer(1, 12))
- # VÕTMESÕNAD JA MEETODID
- if article['key_concepts'] or article['methods_used']:
-
- # Luua väike pealkiri stiil
- small_label_style = ParagraphStyle(
- 'SmallLabel',
- parent=styles['Normal'],
- fontSize=8,
- textColor=colors.HexColor('#7f8c8d'),
- spaceAfter=2,
- )
-
- content_style = ParagraphStyle(
- 'TagContent',
- parent=styles['Normal'],
- fontSize=9,
- spaceAfter=4,
- )
-
- if article['key_concepts']:
- concepts_text = ", ".join(article['key_concepts'][:10])
- elements.append(Paragraph("<b>Võtmesõnad:</b> " + concepts_text, content_style))
-
- if article['methods_used']:
- methods_text = ", ".join(article['methods_used'][:8])
- elements.append(Paragraph("<b>Meetodid:</b> " + methods_text, content_style))
-
- elements.append(Spacer(1, 6))
- # ABSTRAKT (inglise keeles)
- if article['abstract_en']:
- elements.append(Paragraph("ABSTRAKT (inglise keeles):", section_style))
- abstract_text = article['abstract_en']
- if len(abstract_text) > 800:
- abstract_text = abstract_text[:800] + "..."
- elements.append(Paragraph(abstract_text, normal_style))
- elements.append(Spacer(1, 12))
- # KOKKUVÕTE (eesti keeles)
- if article['summary_et']:
- elements.append(Paragraph("KOKKUVÕTE (eesti keeles):", section_style))
- summary = format_summary_for_pdf(article['summary_et'])
- elements.append(Paragraph(summary, normal_style))
- elements.append(Spacer(1, 12))
- # ====================================================================
- # TRANSPORDI PLANEERIMISE KONTEKST
- # ====================================================================
- if article['transport_context']:
- elements.append(Paragraph("TRANSFORDIPLANEERIMISE KONTEKST:", section_style))
- # DEBUG-REA – prindi üks-ühele objekt konsooli
- #print("DEBUG transport_context:", article['article_id'], article['transport_context'])
- context_text = format_transport_context(article['transport_context'])
- #print(f"DEBUG context_text returned: {context_text}") # <-- LISA SEE RIDA
- if context_text:
- # Jaga osadeks ja lisa eraldi paragrahfidena
- parts = context_text.split('\n\n') # Jaga tühjast reaga
- for part in parts:
- if part.strip():
- # Asenda \n <br/> tag'iga
- part_html = part.replace('\n', '<br/>')
- try:
- elements.append(Paragraph(part_html, normal_style))
- elements.append(Spacer(1, 6))
- except Exception as e:
- print(f"❌ Failed to add part to PDF: {e}")
- # Kui HTML tag ei tööta, proovi ilma
- part_plain = part.replace('\n', ' ')
- elements.append(Paragraph(part_plain, normal_style))
- elements.append(Spacer(1, 6))
-
- print("✅ Context added to PDF successfully")
- else:
- elements.append(Paragraph("Analüüsi andmed puuduvad", normal_style))
- elements.append(Spacer(1, 12))
- # ====================================================================
- # FOOTER INFO (allikfail, töötlemise kuupäev)
- # ====================================================================
- footer_info = []
- if article['source_file']:
- source_name = os.path.basename(article['source_file'])
- footer_info.append(f"Allikfail: {source_name}")
- if article['processing_date']:
- try:
- date_str = article['processing_date']
- if '.' in date_str:
- date_str = date_str.split('.')[0]
- date_str = date_str.replace('Z', '+00:00')
- date_obj = datetime.fromisoformat(date_str)
- footer_info.append(f"Töödeldud: {date_obj.strftime('%d.%m.%Y %H:%M')}")
- except Exception:
- footer_info.append(f"Töödeldud: {article['processing_date'][:19]}")
- if footer_info:
- elements.append(Spacer(1, 6))
- elements.append(Paragraph(" | ".join(footer_info), metadata_style))
- # Lisa lehevahetus (välja arvatud viimase artikli puhul)
- if i < len(articles) - 1:
- elements.append(PageBreak())
- else:
- elements.append(Spacer(1, 24))
- # LÕPPINFO
- elements.append(Paragraph("=" * 80, metadata_style))
- elements.append(Spacer(1, 6))
- elements.append(Paragraph(f"Kokku eksporditud artikleid: {len(articles)}", metadata_style))
- elements.append(Paragraph("Eksporditud Weaviate teadusartiklite andmebaasist", metadata_style))
- elements.append(Paragraph(f"PDF genereeritud: {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}", metadata_style))
- # Koosta PDF
- doc.build(elements)
- return len(articles)
- # ============================================================================
- # PEAMINE FUNKTSIOON
- # ============================================================================
- def main():
- """Peamine funktsioon - käivitab kogu protsessi"""
- print("=" * 60)
- print("ARTIKLITE EKSPORT PDF FAILI")
- print("=" * 60)
- # Toob artiklid Weaviate'ist
- print("Toon artikleid Weaviate'ist...")
- articles = get_all_articles_from_weaviate()
- if not articles:
- print("❌ Ei leidnud ühtegi artiklit Weaviate'is!")
- return
- print(f"✓ Leidsin {len(articles)} artiklit")
- # Genereeri PDF failinimi
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- output_dir = "./data/exports"
- os.makedirs(output_dir, exist_ok=True)
- output_filename = os.path.join(output_dir, f"artiklid_eksport_{timestamp}.pdf")
- # Loo PDF
- print(f"Loon PDF faili: {output_filename}")
- try:
- article_count = create_pdf_from_articles(articles, output_filename)
- print("=" * 60)
- print(f"✅ VALMIS! Loodud PDF fail: {output_filename}")
- print(f" - Eksporditud artikleid: {article_count}")
- print(f" - Faili suurus: {os.path.getsize(output_filename) / 1024:.1f} KB")
- print("=" * 60)
- if articles:
- print("\nEsimesed artiklid:")
- for i, article in enumerate(articles[:3]):
- title_preview = article['title']
- if len(title_preview) > 60:
- title_preview = title_preview[:60] + "..."
- print(f" {i+1}. {title_preview}")
- except Exception as e:
- print(f"\n❌ VIGA PDF loomisel: {e}")
- traceback.print_exc()
- if __name__ == "__main__":
- main()
|