Skip to content

Commit

Permalink
Made minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhil25803 committed May 16, 2024
1 parent 19e2642 commit 1658a88
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 299 deletions.
41 changes: 26 additions & 15 deletions src/scrape_up/ambitionBox/company.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@
from bs4 import BeautifulSoup


class Comapiens:
def __init__(self,num_pages: int=1):
class Comapiens:
def __init__(self, num_pages: int = 1):
self.num_pages = num_pages

def write_sorted_list(self, file, company_list):

company_list.sort(key=lambda x: x[1], reverse=True)
for company_name, rating in company_list:
file.write(f"{company_name.strip()} {rating}\n")

def scrape_companies(self):


headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
}
Expand All @@ -26,15 +23,19 @@ def scrape_companies(self):
response = requests.get(url, headers=headers)

if response.status_code == 200:
soup = BeautifulSoup(response.text, 'lxml')
soup = BeautifulSoup(response.text, "lxml")

companies = soup.find_all('div', class_="companyCardWrapper")
companies = soup.find_all("div", class_="companyCardWrapper")

company_ratings = []

for company in companies:
company_name = company.find('h2', class_="companyCardWrapper__companyName").text.strip()
company_star = company.find('span', class_="companyCardWrapper__companyRatingValue")
company_name = company.find(
"h2", class_="companyCardWrapper__companyName"
).text.strip()
company_star = company.find(
"span", class_="companyCardWrapper__companyRatingValue"
)

if company_name and company_star:
try:
Expand All @@ -46,24 +47,34 @@ def scrape_companies(self):
with open("src/scrape_up/ambitionBox/company_ratings.txt", "a") as f:
f.write(f"\nPAGE: {url}\n")
f.write("COMPANY UNDER 5 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 4 < r[1] <= 5])
self.write_sorted_list(
f, [r for r in company_ratings if 4 < r[1] <= 5]
)

f.write("\nCOMPANY UNDER 4 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 3 < r[1] <= 4])
self.write_sorted_list(
f, [r for r in company_ratings if 3 < r[1] <= 4]
)

# Corrected indentation for following lines
f.write("\nCOMPANY UNDER 3 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 2 < r[1] <= 3])
self.write_sorted_list(
f, [r for r in company_ratings if 2 < r[1] <= 3]
)

f.write("\nCOMPANY UNDER 2 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 1 < r[1] <= 2])
self.write_sorted_list(
f, [r for r in company_ratings if 1 < r[1] <= 2]
)

f.write("\nCOMPANY UNDER 1 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 0 < r[1] <= 1])
self.write_sorted_list(
f, [r for r in company_ratings if 0 < r[1] <= 1]
)
else:
print(f"Error scraping page {page}: {response.status_code}")


if __name__ == "__main__":
c = Comapiens(10)
c = Comapiens(10)
c.scrape_companies()
Loading

0 comments on commit 1658a88

Please sign in to comment.