|
|
@@ -26,6 +26,28 @@ ULTRA_COMPACT_QUERY = {
|
|
|
"""
|
|
|
}
|
|
|
|
|
|
+def fix_broken_words(text):
|
|
|
+ """
|
|
|
+ Parandab poolitatud sõnad (line-breaking sõnad)
|
|
|
+
|
|
|
+ Näide:
|
|
|
+ "trac\nows" → "tracows"
|
|
|
+ "vähe\nmusest" → "vähemusest"
|
|
|
+ """
|
|
|
+ if not text:
|
|
|
+ return text
|
|
|
+
|
|
|
+ # Pattern 1: väike sõna (3-20 märki) + reavahetus + väike sõna (2-10 märki)
|
|
|
+ # Eemaldab reavahetus keskel sõnade
|
|
|
+ text = re.sub(r'(\w{3,20})\n(\w{2,10})', r'\1\2', text)
|
|
|
+
|
|
|
+ # Pattern 2: Üldine - asendab kõik reavahetused tühikutega
|
|
|
+ text = re.sub(r'\n\s*', ' ', text)
|
|
|
+ # Ülesse nopitud vead. Põhjus teadmata
|
|
|
+ text = text.replace('trac ow', 'traffic flow')
|
|
|
+
|
|
|
+ return text
|
|
|
+
|
|
|
def fetch_articles():
|
|
|
"""Toob artiklid"""
|
|
|
print("📡 Toon artikleid (title, source_file, summary_et)...")
|
|
|
@@ -118,12 +140,12 @@ def extract_research_question(summary_et):
|
|
|
"""
|
|
|
if not summary_et:
|
|
|
return "N/A"
|
|
|
-
|
|
|
+
|
|
|
# Otsi "Uurimisküsimused ja eesmärgid:" sektsiooni
|
|
|
patterns = [
|
|
|
r'(?:^|\n)\s*(?:[-•*•]\s+)?\*{0,2}Uurimisküsimused ja eesmärgid:\*{0,2}\s*(.+?)(?=(?:^|\n)\s*(?:[-•*•]\s+)?\*{0,2}Teaduslik tähtsus:|$)',
|
|
|
]
|
|
|
-
|
|
|
+
|
|
|
text = None
|
|
|
for pattern in patterns:
|
|
|
match = re.search(pattern, summary_et, re.DOTALL | re.IGNORECASE)
|
|
|
@@ -170,21 +192,68 @@ def generate_markdown(articles):
|
|
|
"""Genereerib Markdown tabel"""
|
|
|
md_path = "/home/ardo/Downloads/articles_ultra_compact.md"
|
|
|
|
|
|
+ # ✅ V3.6: CSS styling Markdown failis
|
|
|
md_content = f"""# Teadusartiklite ultra-kompaktne nimekiri
|
|
|
|
|
|
+<style type="text/css">
|
|
|
+@page {{
|
|
|
+ size: landscape; /* ✅ A4 11" x 8.5" */
|
|
|
+ margin: 10mm;
|
|
|
+}}
|
|
|
+body {{
|
|
|
+ margin: 0;
|
|
|
+ padding: 20px;
|
|
|
+ width: 100%;
|
|
|
+}}
|
|
|
+table {{
|
|
|
+ width: 100%;
|
|
|
+ table-layout: fixed;
|
|
|
+}}
|
|
|
+
|
|
|
+table th:nth-child(1),
|
|
|
+table td:nth-child(1) {{
|
|
|
+ width: 5%;
|
|
|
+}}
|
|
|
+
|
|
|
+table th:nth-child(2),
|
|
|
+table td:nth-child(2) {{
|
|
|
+ width: 20%;
|
|
|
+}}
|
|
|
+
|
|
|
+table th:nth-child(3),
|
|
|
+table td:nth-child(3) {{
|
|
|
+ width: 15%;
|
|
|
+}}
|
|
|
+
|
|
|
+table th:nth-child(4),
|
|
|
+table td:nth-child(4) {{
|
|
|
+ width: 60%;
|
|
|
+}}
|
|
|
+
|
|
|
+table td {{
|
|
|
+ word-wrap: break-word;
|
|
|
+ overflow-wrap: break-word;
|
|
|
+}}
|
|
|
+</style>
|
|
|
+
|
|
|
**Kokku artikle:** {len(articles)}
|
|
|
**Eksporditud:** {datetime.now().strftime("%d.%m.%Y %H:%M")}
|
|
|
|
|
|
**Väljad:**
|
|
|
-1. Pealkiri
|
|
|
-2. Allikfail
|
|
|
-3. Uurimisküsimus (esimene 200 tähemärki "Uurimisküsimused ja eesmärgid" sektsoonist, ilma markdown loetelu markerita)
|
|
|
+1. \# (5% laiusest)
|
|
|
+2. Pealkiri (20% laiusest)
|
|
|
+3. Allikfail (15% laiusest)
|
|
|
+4. Uurimisküsimus (60% laiusest)
|
|
|
|
|
|
---
|
|
|
|
|
|
-| # | Pealkiri | Allikfail | Uurimisküsimus |
|
|
|
-|---|----------|-----------|----------------|
|
|
|
"""
|
|
|
+
|
|
|
+ # Line-by-line ehitus
|
|
|
+ lines = [
|
|
|
+ "| # | Pealkiri | Allikfail | Uurimisküsimus |",
|
|
|
+ "|---|----------|-----------|----------------|",
|
|
|
+ ]
|
|
|
|
|
|
for i, article in enumerate(articles, 1):
|
|
|
title = article.get('title', 'N/A')
|
|
|
@@ -194,10 +263,18 @@ def generate_markdown(articles):
|
|
|
|
|
|
# Markdown-safe (eemalda |)
|
|
|
title = title.replace('|', '-')
|
|
|
+ #title = title.replace('trac ow', 'traffic flow')
|
|
|
+ title = clean_markdown_lists(title)
|
|
|
+ # ✅ OLULINE: Esimene parandame poolitatus sõnad
|
|
|
+ title = fix_broken_words(title)
|
|
|
research_q = research_q.replace('|', '-')
|
|
|
-
|
|
|
- md_content += f"| {i} | {title} | `{source}` | {research_q} |\n"
|
|
|
-
|
|
|
+ # ✅ Puhas rida: ilma trailing spaces'ita
|
|
|
+ row = f"| {i} | {title} | {source} | {research_q} |"
|
|
|
+ lines.append(row)
|
|
|
+
|
|
|
+ md_content += "\n".join(lines)
|
|
|
+
|
|
|
+
|
|
|
md_content += f"""
|
|
|
---
|
|
|
|
|
|
@@ -227,6 +304,7 @@ def generate_markdown(articles):
|
|
|
print(f"❌ Viga Markdown loomisel: {e}")
|
|
|
return None
|
|
|
|
|
|
+
|
|
|
def generate_html(articles):
|
|
|
"""Genereerib HTML tabel"""
|
|
|
html_path = "/home/ardo/Downloads/articles_ultra_compact.html"
|