#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Carlos Marten based on Kovid Goyal official version'
__copyright__ = '2008, Kovid Goyal
kovid@kovidgoyal.net'
description = 'Elcorreo Newspaper (Spain) - v1.0 16.04.2022'
__docformat__ = 'restructuredtext en'
'''
Elcorreo.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from html5_parser import parse
import datetime
from datetime import date
class Elcorreo(BasicNewsRecipe):
__author__ = 'Carlos Marten'
description = 'Elcorreo'
now = datetime.datetime.now()
title = u'El Correo ['+str(date.today())+']'
publisher = u'Ediciones El Pa\xeds SL'
category = 'News, politics, culture, economy, general interest'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 5
max_articles_per_feed = 4
recursion = 2
no_stylesheets = True
remove_attributes = ['width', 'height','display','margin','padding', 'position','border']
remove_javascript = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
compress_news_images = False
#auto_cleanup = True
#scale_news_images_to_device = True
def getcoverurl():
now = datetime.datetime.now()
return 'https://portada.iperiodico.es/'+str(now.year)+'/0'+str(now.month)+'/'+str(now.day)+'_elcorreo.750.jpg'
cover_url = getcoverurl()
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
extra_css = '''
img{
all: initial;
width: 100%
}
h1 { font-size: 22px }
h2 { font-size: 20px }
'''
keep_only_tags = [
dict(name='h1', attrs={'class': [
'v-a-t', #title
]}),
dict(name='h2', attrs={'class': [
'v-a-sub-t', #subtitle
]}),
dict(name='script', attrs={'type': 'application/ld+json',}), #json with article (closed)
dict(name='article', attrs={'class': [
'v-a v-a--d v-a--d-bs v-a--p-b', #article
]}),
dict(name='div', attrs={'class': [
'amp-access-hide', #article (closed)
]}),
]
remove_tags = [
dict(attrs={'class': [
'v-drpw__w', #social
'v-mdl-tpc', #section topics related
'content-exclusive-bg', #paywall
'v-d__btn-c', #comenta y reporta error
'v-i-b', #compartir
'v-pill-m', #icono de play y ampliar imagen
'v-mdl-ath__c', #comentarios
]},),
dict(attrs={'class': [
'v-a-img', #image
]},),
]
def postprocess_html(self, soup, first):
return soup
feeds = [
(u'Portada', u'https://www.elcorreo.com/rss/2.0/portada/'),
]
calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'