| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- import os
- import json
- import logging
- from datetime import datetime
- from typing import Dict, Any, List
- from .config import config
- def setup_logging():
- """Seadista logimine"""
- log_file = os.path.join(config.log_dir, f"processing_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
-
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- handlers=[
- logging.FileHandler(log_file, encoding='utf-8'),
- logging.StreamHandler()
- ]
- )
-
- return logging.getLogger(__name__)
- def save_processed_article(data: Dict[str, Any]):
- """Salvesta töödeldud artikkel JSON failina"""
- try:
- # Genereeri failinimi
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
- filename = f"article_{timestamp}.json"
- filepath = os.path.join(config.processed_dir, filename)
-
- # Salvesta JSON failina
- with open(filepath, 'w', encoding='utf-8') as f:
- json.dump(data, f, ensure_ascii=False, indent=2)
-
- logger = logging.getLogger(__name__)
- logger.info(f"Töödeldud andmed salvestatud: {filepath}")
-
- except Exception as e:
- logger = logging.getLogger(__name__)
- logger.error(f"Viga andmete salvestamisel: {str(e)}")
- def load_processed_articles() -> List[Dict]:
- """Lae töödeldud artiklid"""
- articles = []
-
- for filename in os.listdir(config.processed_dir):
- if filename.endswith('.json'):
- filepath = os.path.join(config.processed_dir, filename)
- try:
- with open(filepath, 'r', encoding='utf-8') as f:
- articles.append(json.load(f))
- except:
- continue
-
- return articles
- def clean_filename(filename: str) -> str:
- """Puhasta failinimi erimärkidest"""
- import re
- # Eemalda erimärgid, jäta ainult tähed, numbrid, tühikud, punktid ja sidekriipsud
- clean = re.sub(r'[^\w\s\.\-]', '', filename)
- # Asenda mitmik tühikud ühega
- clean = re.sub(r'\s+', ' ', clean)
- return clean.strip()
|