import os import json import logging from datetime import datetime from typing import Dict, Any, List from .config import config def setup_logging(): """Seadista logimine""" log_file = os.path.join(config.log_dir, f"processing_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log") logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_file, encoding='utf-8'), logging.StreamHandler() ] ) return logging.getLogger(__name__) def save_processed_article(data: Dict[str, Any]): """Salvesta töödeldud artikkel JSON failina""" try: # Genereeri failinimi timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') filename = f"article_{timestamp}.json" filepath = os.path.join(config.processed_dir, filename) # Salvesta JSON failina with open(filepath, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) logger = logging.getLogger(__name__) logger.info(f"Töödeldud andmed salvestatud: {filepath}") except Exception as e: logger = logging.getLogger(__name__) logger.error(f"Viga andmete salvestamisel: {str(e)}") def load_processed_articles() -> List[Dict]: """Lae töödeldud artiklid""" articles = [] for filename in os.listdir(config.processed_dir): if filename.endswith('.json'): filepath = os.path.join(config.processed_dir, filename) try: with open(filepath, 'r', encoding='utf-8') as f: articles.append(json.load(f)) except: continue return articles def clean_filename(filename: str) -> str: """Puhasta failinimi erimärkidest""" import re # Eemalda erimärgid, jäta ainult tähed, numbrid, tühikud, punktid ja sidekriipsud clean = re.sub(r'[^\w\s\.\-]', '', filename) # Asenda mitmik tühikud ühega clean = re.sub(r'\s+', ' ', clean) return clean.strip()