-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathelperiodico.rb
93 lines (81 loc) · 2.27 KB
/
elperiodico.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
require 'tweakphoeus'
require 'nokogiri'
class Elperiodico
URL = "http://www.elperiodico.com"
URL_SOC = "#{URL}/es/ultimas-noticias/"
N_PAGES = 22
def initialize
@http = Tweakphoeus::Client.new()
end
def get_notices
notices = []
url = ""
page = get_page(URL_SOC)
<<<<<<< HEAD
File.open('notices_el_periodico.txt', 'a') do |f|
=======
#puts last_page?(page)
#while last_page?(page)
File.open('notices.txt', 'a') do |f|
>>>>>>> 02997a3d89e1af5a48a7adb1eac7261e2d47bc05
(2..N_PAGES).each do |number|
table = page.css('#cmp-list-last-news-container')
table.css('.item').each do |notice|
date = notice.css('.fecha').text
<<<<<<< HEAD
link = URL + notice.css('h2 > a').attr("href").text
title = notice.css('h2 > a').attr('title').text
desc = notice.css('.p2').text
notices = {
=======
puts date
link = URL + notice.css('h2 > a').attr("href").text
title = notice.css('h2 > a').attr('title').text
desc = notice.css('.p2').text
notices << {
>>>>>>> 02997a3d89e1af5a48a7adb1eac7261e2d47bc05
"date" => date,
"link" => link,
"title" => title,
"desc" => desc
}
puts notices
f.puts notices
<<<<<<< HEAD
page = get_page("#{URL_SOC}cmp-lst-last-news-#{number.to_s}.inc")
end
end
end
=======
page = "#{URL_SOC}cmp-lst-last-news-#{number.to_s}.inc"
end
end
end
#url = next_page(page)
#page = get_page(url)
#end
#save_info(notices)
>>>>>>> 02997a3d89e1af5a48a7adb1eac7261e2d47bc05
end
def get_page url
response = @http.get(url)
Nokogiri::HTML(response.body)
end
<<<<<<< HEAD
=======
#def next_page page
#URL + page.css('.paginacion > .boton.activo').first.attr('href')
#end
#def last_page? page
#puts page.css('.paginacion > .boton.activo').first.attr('title')
#return true if page.css('.paginacion > .boton.activo').first.attr('title').eql?("Página siguiente")
#false
#end
>>>>>>> 02997a3d89e1af5a48a7adb1eac7261e2d47bc05
def save_info notices
# TODO: save notices in files or ddbb
a = File.open("notices_elperiódico.txt","w")
a.write(notices)
a.close
end
end