diff --git a/dev-documentation.md b/dev-documentation.md index b4d3d631..f23f1818 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1615,6 +1615,30 @@ First create an object of class `Dictionary`. | `.get_word_of_the_day()` | Returns the word of the day. | | `.word_of_the_day_definition()` | Returns the definition of the word of the day. | +-------- + + +#### AmbitionBx + +Create an directory with name ambitonbox +created a python which consist the code for scarping the website + +```python +# Example usage +from scrape_up import ambitionBox + +num_pages_to_scrape = 2 + +scraper = ambitionBox.Comapiens(num_pages_to_scrape) + +scraper.scrape_companies() + +``` + +| Methods | Details | +| --------------- | ----------------------------------------------------------------------------- | +| `.scrape_companies()` | Returns the company name with the rating. | + --- ## Geeksforgeeks diff --git a/documentation.md b/documentation.md index 095e1260..1dcf24b0 100644 --- a/documentation.md +++ b/documentation.md @@ -733,3 +733,4 @@ boxoffice = imdb.BoxOffice() | Methods | Details | | --------------- | ------------------------------------------------------------------------------ | | `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released.| + diff --git a/src/scrape_up/ambitionBox/company.py b/src/scrape_up/ambitionBox/company.py new file mode 100644 index 00000000..49f30251 --- /dev/null +++ b/src/scrape_up/ambitionBox/company.py @@ -0,0 +1,69 @@ +import requests +from bs4 import BeautifulSoup + + +class Comapiens: + def __init__(self,num_pages: int=1): + self.num_pages = num_pages + + def write_sorted_list(self, file, company_list): + + company_list.sort(key=lambda x: x[1], reverse=True) + for company_name, rating in company_list: + file.write(f"{company_name.strip()} {rating}\n") + + def scrape_companies(self): + + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" + } + + for page in range(1, self.num_pages + 1): + print(f"Scraping webpage number: {page} of {self.num_pages}") + + url = f"https://www.ambitionbox.com/list-of-companies?page={page}" + response = requests.get(url, headers=headers) + + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'lxml') + + companies = soup.find_all('div', class_="companyCardWrapper") + + company_ratings = [] + + for company in companies: + company_name = company.find('h2', class_="companyCardWrapper__companyName").text.strip() + company_star = company.find('span', class_="companyCardWrapper__companyRatingValue") + + if company_name and company_star: + try: + rating = float(company_star.text) + company_ratings.append((company_name, rating)) + except ValueError: + print(f"Error parsing rating for company: {company_name}") + + with open("src/scrape_up/ambitionBox/company_ratings.txt", "a") as f: + f.write(f"\nPAGE: {url}\n") + f.write("COMPANY UNDER 5 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 4 < r[1] <= 5]) + + f.write("\nCOMPANY UNDER 4 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 3 < r[1] <= 4]) + + # Corrected indentation for following lines + f.write("\nCOMPANY UNDER 3 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 2 < r[1] <= 3]) + + f.write("\nCOMPANY UNDER 2 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 1 < r[1] <= 2]) + + f.write("\nCOMPANY UNDER 1 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 0 < r[1] <= 1]) + else: + print(f"Error scraping page {page}: {response.status_code}") + + +if __name__ == "__main__": + c = Comapiens(10) + c.scrape_companies() diff --git a/src/scrape_up/ambitionBox/company_ratings.txt b/src/scrape_up/ambitionBox/company_ratings.txt new file mode 100644 index 00000000..e69de29b