forked from liukunjun123/bill-tracker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bill-parse.py
39 lines (33 loc) · 1002 Bytes
/
bill-parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# -*- coding: utf-8 -*-
import requests, bs4
bill_number = input('bill number')
if not bill_number.isalnum():
print('error must be a number')
url = "http://www.capitol.state.tx.us/tlodocs/84R/billtext/html/SB000" + bill_number + "I.htm" #this suffix changes depending on what stage the bill is at. we could give them an option
res = requests.get(url)
if not res.status_code == requests.codes.ok:
print('not a vaild bill!')
html = bs4.BeautifulSoup(res.text)
clean_text = html.get_text()
#print('cleantext', clean_text)
period=clean_text.split('.')
#print (period)
index=len(period)-1
print(index)
span_list=[]
i=0
while i<= index:
#print(period[i]+'ENDDDDDD')
span='<span>'+period[i]+'</span>'
#print(span)
span_list.append(span)
#print(span_list)
#period[i].append('</span>')
#span+=period[i]
i+=1
stringtext = ''.join(span_list)
badchars=['\n','\xa0','\r','\t']
for i in badchars:
stringtext = stringtext.strip(i)
print(stringtext)
#print(span_list)