-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_taxrates.py
59 lines (45 loc) · 1.78 KB
/
get_taxrates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
This code outputs the federal income tax rates and brackets
which is scraped from the IRS web page.
"""
last_updated = "5/15/2024"
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = 'https://www.irs.gov/filing/federal-income-tax-rates-and-brackets'
r = requests.get( url )
# Parsing the HTML
soup = BeautifulSoup(r.content, 'html.parser')
table = soup.find_all('table', class_ = "table complex-table table-striped table-bordered table-responsive")
# Title for tables
year_soup = soup.find_all('div', class_ = "field field--name-body field--type-text-with-summary field--label-hidden field--item")[1]
year_txt = year_soup.find( 'h2' )
year = year_txt.text.strip().split()[0]
data_title = [
f"{year} tax rates for \033[4ma single taxpayer\033[0m",
f"{year} tax rates for \033[4ma married filing jointly or qualifying surviving spouse\033[0m",
f"{year} tax rates for \033[4ma married filing separately\033[0m",
f"{year} tax rates for \033[4ma head of household\033[0m",
]
# Scrape data for each table on website (four)
for i in range(len(table)):
# Select corresponding table
cur_table = table[i]
col_titles = cur_table.find_all('th')
col_table_titles = [ title.text.strip() for title in col_titles ]
# print( col_table_titles )
# Create empty dataframe
df = pd.DataFrame(columns = col_table_titles)
col_data = cur_table.find_all("tr")
for row in col_data[1:]:
row_data = row.find_all('td')
indiv_row_data = [ data.text.strip() for data in row_data ]
# print( indiv_row_data )
# Input data in each row
length = len(df)
df.loc[length] = indiv_row_data
print( data_title[i] )
print( df )
print()
# Print latest updated date
print( f"Last updated: {last_updated}")