diff --git a/.github/workflows/greetings.yaml b/.github/workflows/greetings.yaml new file mode 100644 index 00000000..ca8ec8bd --- /dev/null +++ b/.github/workflows/greetings.yaml @@ -0,0 +1,16 @@ +name: Greetings + +on: [issues] + +jobs: + greeting: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/first-interaction@v1 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + issue-message: "Hi there! Thanks for opening this issue. We appreciate your contribution to this open-source project. We aim to respond or assign your issue as soon as possible." + pr-message: "Welcome to Our repository.🎊 Thank you so much for taking the time to point this out." diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 0d7c2856..2222b7f6 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -2,127 +2,76 @@ ## Our Pledge -We as members, contributors, and leaders pledge to make participation in our -community a harassment-free experience for everyone, regardless of age, body -size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. +We, as members, contributors, and leaders, pledge to create a harassment-free experience for everyone in our community. We welcome individuals regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. -We pledge to act and interact in ways that contribute to an open, welcoming, -diverse, inclusive, and healthy community. +We commit to fostering an environment of empathy, kindness, diversity, inclusivity, and health. ## Our Standards -Examples of behavior that contributes to a positive environment for our -community includes: +Examples of behavior that contributes to a positive environment include: -- Demonstrating empathy and kindness toward other people -- Being respectful of differing opinions, viewpoints, and experiences -- Giving and gracefully accepting constructive feedback -- Accepting responsibility and apologizing to those affected by our mistakes, - and learning from the experience -- Focusing on what is best not just for us as individuals, but for the - overall community +- Demonstrating empathy and kindness towards others +- Respecting differing opinions, viewpoints, and experiences +- Giving and accepting constructive feedback gracefully +- Taking responsibility for our mistakes, apologizing to those affected, and learning from the experience +- Considering the well-being of the entire community, not just ourselves Examples of unacceptable behavior include: -- The use of sexualized language or imagery, and sexual attention or - advances of any kind -- Trolling, insulting or derogatory comments, and personal or political attacks -- Public or private harassment -- Publishing others' private information, such as a physical or email - address, without their explicit permission -- Other conduct which could reasonably be considered inappropriate in a - professional setting +- Use of sexualized language, imagery, or advances +- Trolling, insults, derogatory comments, personal attacks, or political attacks +- Harassment, whether public or private +- Publishing others' private information without explicit permission +- Any conduct that could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities -Community leaders are responsible for clarifying and enforcing our standards of -acceptable behavior and will take appropriate and fair corrective action in -response to any behavior that they deem inappropriate, threatening, offensive, -or harmful. - -Community leaders have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are -not aligned to this Code of Conduct, and will communicate reasons for moderation -decisions when appropriate. +Community leaders are responsible for enforcing these standards and will take fair corrective action in response to inappropriate behavior. They have the right to remove, edit, or reject contributions that do not align with this Code of Conduct, and will communicate reasons for their actions. ## Scope -This Code of Conduct applies within all community spaces and also applies when -an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, -posting via an official social media account, or acting as an appointed -representative at an online or offline event. +This Code of Conduct applies to all community spaces and when individuals are officially representing the community in public. This includes using official e-mail addresses, posting via official social media accounts, or acting as appointed representatives at events. ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at -Gmail. -All complaints will be reviewed and investigated promptly and fairly. +Report instances of abusive, harassing, or otherwise unacceptable behavior to the community leaders responsible for enforcement at [contact email]. All complaints will be promptly and fairly reviewed and investigated. -All community leaders are obligated to respect the privacy and security of the -reporter of any incident. +Community leaders are required to respect the privacy and security of incident reporters. ## Enforcement Guidelines -Community leaders will follow these Community Impact Guidelines in determining -the consequences for any action they deem in violation of this Code of Conduct: +Community leaders will use these guidelines to determine consequences for violations: ### 1. Correction -**Community Impact**: Use of inappropriate language or other behavior deemed -unprofessional or unwelcome in the community. +**Community Impact**: Use of inappropriate language or behavior deemed unprofessional or unwelcome. -**Consequence**: A private, written warning from community leaders, providing -clarity around the nature of the violation and an explanation of why the -behavior was inappropriate. A public apology may be requested. +**Consequence**: A private, written warning from community leaders, explaining the violation and why it was inappropriate. A public apology may be requested. ### 2. Warning -**Community Impact**: A violation through a single incident or series -of actions. +**Community Impact**: A violation through a single incident or series of actions. -**Consequence**: A warning with consequences for continued behavior. No -interaction with the people involved, including unsolicited interaction with -those enforcing the Code of Conduct, for a specified period. This -includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or -permanent ban. +**Consequence**: A warning with consequences for continued behavior. No interaction with the people involved for a specified period, including in community spaces and external channels. Violating this may lead to a temporary or permanent ban. ### 3. Temporary Ban -**Community Impact**: A serious violation of community standards, including -sustained inappropriate behavior. +**Community Impact**: A serious violation of community standards. -**Consequence**: A temporary ban from any sort of interaction or public -communication with the community for a specified period. No public or -private interaction with the people involved, including unsolicited interaction -with those enforcing the Code of Conduct, is allowed during this period. -Violating these terms may lead to a permanent ban. +**Consequence**: A temporary ban from any interaction or public communication with the community. No interaction with the people involved is allowed, including unsolicited contact. Violating this may lead to a permanent ban. ### 4. Permanent Ban -**Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an -individual, or aggression toward or disparagement of classes of individuals. +**Community Impact**: Demonstrating a pattern of violation of community standards. -**Consequence**: A permanent ban from any sort of public interaction within -the community. +**Consequence**: A permanent ban from any public interaction within the community. ## Attribution -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. -Community Impact Guidelines were inspired by [Mozilla's code of conduct -enforcement ladder](https://github.com/mozilla/diversity). +Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org -For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. +For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4bba9679..952030ee 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,7 +71,7 @@ pip install -r requirements.txt Now you are done with the project setup, now you can make the changes you want or assign. -### Let's say you want to scrape the avatar URL of and user. Steps applying which we can do this +### Let's say you want to scrape the avatar URL of a user. The steps to apply in order to achieve this are as follows: - At first, we have to scrape the profile page of a user. For that, we have defined a function in the user class as @@ -84,7 +84,7 @@ class Users: def __scrape_page(self): username = self.username - data = requests.get(f"https://github.com/{username}") + data = requests.get(f"https://github.com/{username}") data = BeautifulSoup(data.text, "html.parser") return data ``` diff --git a/dev-documentation.md b/dev-documentation.md index 624696a8..7ff252de 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -774,10 +774,10 @@ posts = Devpost() ``` | Methods | Details | -| ----------------- | -------------------------------------------------------------------------------------------------------------------- | +| ------------------- | -------------------------------------------------------------------------------------------------------------------- | | `.get_projects()` | Returns the latest projects along with their decription, like and commment count, image and member details. | | `.search(topic)` | Returns the searched projects along with their decription, like and commment count, image and member details. | -| `.get_hackathons()` | Returns the latest hackathons along with their title, participants, prizes, deadlines. | +| `.get_hackathons()` | Returns the latest hackathons along with their title, participants, prizes, deadlines. | | `.get_featured()` | Returns the latest featured projects along with their decription, like and commment count, image and member details. | | `.get_winner()` | Returns the latest winning projects along with their decription, like and commment count, image and member details. | @@ -1189,10 +1189,10 @@ yf = YahooFinance() # BBC News -First create an object of class `User` +First create an object of class `BBCNews` ```python -user = instagram.Users(username="nikhil25803") +bbc_scraper = BBCNews() ``` | Methods | Details | @@ -1471,7 +1471,7 @@ academia = Academia() | Method | Details | | ----------------------------- | --------------------------------------------------------------------- | -| `get_research_topics(letter)` | Fetches and returns research topics starting with the given letter. | +| `get_research_topics()` | Fetches and returns research topics. | | `get_research_papers(search)` | Fetches and returns research papers related to the given search term. | --- @@ -1569,7 +1569,6 @@ olympics = Olympics() | `.alldeceased()` | Returns the list of all recently deceased olympians along with their death date. | | `.alltimemedals()` | Returns list of all countries with their total numbers of medals yet in all categories. | - --- ## Codeforces @@ -1591,7 +1590,7 @@ Methods | `.get_user_data(username)` | Fetches user data from CodeForces. | ``` --------------------- +--- ## 88x31 Banners @@ -1604,15 +1603,67 @@ scraper = Scraper88x31() | Methods | Details | | ------------ | ----------------------------------------------- | | `.get_all()` | Returns the list of all available 88x31 banners | ------ + +--- ## Dictionary.com First create an object of class `Dictionary`. +| Methods | Details | +| ------------------------------- | ---------------------------------------------- | +| `.get_word_of_the_day()` | Returns the word of the day. | +| `.word_of_the_day_definition()` | Returns the definition of the word of the day. | -| Methods | Details | -| ---------------- | -------------------------------------------------------------------------------------------- | -| `.get_word_of_the_day()` | Returns the word of the day. | -| `.word_of_the_day_definition()` | Returns the definition of the word of the day. -------- + + +#### AmbitionBx + +Create an directory with name ambitonbox +created a python which consist the code for scarping the website + +```python +# Example usage +from scrape_up import ambitionBox + +num_pages_to_scrape = 2 + +scraper = ambitionBox.Comapiens(num_pages_to_scrape) + +scraper.scrape_companies() + +``` + +| Methods | Details | +| --------------- | ----------------------------------------------------------------------------- | +| `.scrape_companies()` | Returns the company name with the rating. | + +--- + +## Geeksforgeeks + +First create an object of class `Geeksforgeeks`. +```python +geeksforgeeks = Geeksforgeeks(user="username") +``` + +| Methods | Details | +| ------------------------------- | ---------------------------------------------- | +| `.get_profile()` | Returns the user data in json format. | + +--- + +## Wuzzuf + +```python +from scrap-up import wuzzuf +jobs = wuzzuf.Jobs() +``` + +The `Jobs` class provides methods for configuring scraping parameters and fetching job listings: + +| Methods | Details | +| --------------- | ---------------------------------------------------------------------------------------- | +| `.filter_job()` | Apply filters such as job title, country, city, and range of years of experience. | +| `.fetch_jobs()` | Fetch job listings from the website based on the applied filters, across multiple pages. | diff --git a/documentation.md b/documentation.md index 4aa305ee..1dcf24b0 100644 --- a/documentation.md +++ b/documentation.md @@ -15,7 +15,7 @@ pip install scrape-up from scrape_up import github # Instantiate an object with the username provided. -user = github.Users(username="nikhil25803") +user = github.Users(username="Suhani2") # Call the followers function - it will return the number of followers per user.followers() @@ -56,7 +56,7 @@ from scrape_up import github Create an instance of the class `Users`. ```python -user = github.Users(username="nikhil25803") +user = github.Users(username="Suhani2") ``` | Methods | Details | @@ -88,7 +88,7 @@ user = github.Users(username="nikhil25803") Create an instance of the class `Repository`. ```python -repository = github.Repository(username="nikhil25803", repository_name="scrape-up") +repository = github.Repository(username="Suhani2", repository_name="scrape-up") ``` | Methods | Details | @@ -102,7 +102,7 @@ repository = github.Repository(username="nikhil25803", repository_name="scrape-u | `.releases()` | Returns the last ten releases of a repository. | | `.issues_count()` | Returns number of issues in a repository | | `.readme` | Saves the readme.md file of the given user to the current working directory. To view the readme.md with a live server, change ".md" to ".html" in "readme.md". | -| `.get_pull_requests_ids()` | Returns all ids of opened pull requests in a repository. | +| `.get_pull_requests_ids()` | Returns all IDs of opened pull requests in a repository. | | `.get_issues()` | Returns the list of all open issues in a repository. | | `.commits()` | Returns the number of commits in a repository. | | `.get_readme()` | Returns & saves README.md file of the special repository (if exists) | @@ -115,7 +115,7 @@ repository = github.Repository(username="nikhil25803", repository_name="scrape-u Create an instance of the class `Issue` ```python -repository = github.Issue(username="nikhil25803", repository_name="scrape-up", issue_number=59) +repository = github.Issue(username="Suhani2", repository_name="scrape-up", issue_number=883) ``` | Methods | Details | @@ -132,7 +132,7 @@ repository = github.Issue(username="nikhil25803", repository_name="scrape-up", i Create an instance of the class `PullRequest` ```python -pull_request = github.PullRequest(username="nikhil25803", repository_name="scrape-up", pull_request_number=30) +pull_request = github.PullRequest(username="Suhani2", repository_name="scrape-up", pull_request_number=30) ``` | Methods | Details | @@ -240,10 +240,10 @@ articles = hackernews.Articles() from scrape_up import hackerearth ``` -Create an object of class `Challanges` +Create an object of class `Challenges` ```python -hackerearth = hackerearth.Challanges() +hackerearth = hackerearth.Challenges() ``` | Methods | Details | @@ -271,7 +271,7 @@ hackerank = hackerrank.User() | Methods | Details | | ---------------------------- | ----------------------------------------------------------------------------------------- | | `get_profile(id="username")` | Returns name, username, country, user_type, details, badges, verified_skills, social etc. | -| `get_skills()` | Returns a list of verified skills and their links | +| `get_skills()` | Returns a list of verified skills and their links. | ### Scrape contest details @@ -283,8 +283,8 @@ hackerank = hackerrank.Contest() | Methods | Details | | --------------------- | ------------------------------------------------------------------- | -| `active_contests()` | Returns information on active contests like title, status, and link | -| `archived_contests()` | Returns information regarding archived contests | +| `active_contests()` | Returns information on active contests like title, status, and link.| +| `archived_contests()` | Returns information regarding archived contests. | --- @@ -300,12 +300,12 @@ Create an instance of `Hashnode` class. blogs = hashnode.Hashnode() ``` -| Methods | Details | -| ----------------- | ----------------------------------------------------------------------------------------------------- | -| `.get_feed()` | Returns the blogs with title, descriptions, author, read time, like and comment count, date and link | -| `.get_featured()` | Returns the featured blogs with title, descriptions, author, like and comment count, date and link | -| `.get_recent()` | Returns the recent blogs with title, descriptions, author, like and comment count, date and link | -| `.search(topic)` | Returns the blogs with title, descriptions, author, like and comment count, date and link for a topic | +| Methods | Details | +| ----------------- | ------------------------------------------------------------------------------------------------------ | +| `.get_feed()` | Returns the blogs with title, descriptions, author, read time, like and comment count, date, and link. | +| `.get_featured()` | Returns the featured blogs with title, descriptions, author, like and comment count, date, and link. | +| `.get_recent()` | Returns the recent blogs with title, descriptions, author, like and comment count, date, and link. | +| `.search(topic)` | Returns the blogs with title, descriptions, author, like and comment count, date, and link for a topic.| --- @@ -321,12 +321,12 @@ Create an instance of `ICC` class. scraper = icc.ICC() ``` -| Method | Details | -| ------------------------------------ | ------------------------------------------------------------------- | -| `.team_rankings(format)` | Returns the list of rankings of teams of the desired format | -| `.player_ranking(type,format)` | Returns the list of player ranking of desired type and format | -| `.team_rankings_women(format)` | Returns the list of rankings of teams of the desired format | -| `.player_ranking_women(type,format)` | Returns the list of women player ranking of desired type and format | +| Method | Details | +| ------------------------------------ | --------------------------------------------------------------------- | +| `.team_rankings(format)` | Returns the list of rankings of teams of the desired format. | +| `.player_ranking(type,format)` | Returns the list of player rankings of desired type and format. | +| `.team_rankings_women(format)` | Returns the list of rankings of teams of the desired format. | +| `.player_ranking_women(type,format)` | Returns the list of women player rankings of desired type and format. | --- @@ -344,7 +344,7 @@ academia = academia.Academia() | Method | Details | | ----------------------------- | --------------------------------------------------------------------- | -| `get_research_topics(letter)` | Fetches and returns research topics starting with the given letter. | +| `get_research_topics()` | Fetches and returns research topics. | | `get_research_papers(search)` | Fetches and returns research papers related to the given search term. | --- @@ -363,12 +363,12 @@ questions = askubuntu.Questions("topic") | Methods | Details | | --------------------------- | ---------------------------------------------------------------------------------------------------- | -| `.getNewQuestions()` | Returns the new questions, views, votes, answer counts, and descriptions in JSON format | -| `.getActiveQuestions()` | Returns the active questions, views, votes, answer counts, and descriptions in JSON format | -| `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format | -| `.getBountiedQuestions()` | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format | -| `.getFrequentQuestions()` | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format | -| `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format | +| `.getNewQuestions()` | Returns the new questions, views, votes, answer counts, and descriptions in JSON format. | +| `.getActiveQuestions()` | Returns the active questions, views, votes, answer counts, and descriptions in JSON format. | +| `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format. | +| `.getBountiedQuestions()` | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format. | +| `.getFrequentQuestions()` | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format.| +| `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format. | --- @@ -384,10 +384,10 @@ First create an object of class `BBCNews` user = bbcnews.BBCNews() ``` -| Methods | Details | -| ------------------ | -------------------------------------------------------- | -| `.get_headlines()` | Returns the list of object containig the headlines | -| `get_article()` | Returns an object with proper details about the articles | +| Methods | Details | +| ------------------ | --------------------------------------------------------- | +| `.get_headlines()` | Returns the list of objects containing the headlines. | +| `get_article()` | Returns an object with proper details about the articles. | --- @@ -423,11 +423,11 @@ response = covidinfo.CovidInfo() | Methods | Details | | -------------------- | --------------------------------------------------------------- | -| `.covid_data()` | Returns the list of all the covid data scraped from the website | -| `.total_cases()` | Returns the count of total covid cases all over the world | -| `.total_deaths()` | Returns the count of deaths covid cases all over the world | -| `.total_recovered()` | Returns the count of recovered covid cases all over the world | -| `.latest_news()` | Return the lastest news of the day | +| `.covid_data()` | Returns the list of all covid data scraped from the website. | +| `.total_cases()` | Returns the count of total covid cases all over the world. | +| `.total_deaths()` | Returns the count of deaths covid cases all over the world. | +| `.total_recovered()` | Returns the count of recovered covid cases all over the world. | +| `.latest_news()` | Return the latest news of the day. | --- @@ -449,7 +449,7 @@ Create an instance of `Cricubzz` class. | `.get_recent_matches()` | Returns a list of recent matches from Cricbuzz. | | `.get_upcoming_matches()` | Returns a list of upcoming matches from Cricbuzz. | | `.get_series()` | Returns a dictionary of series in month and year format from Cricbuzz. | -| `.get_series_from_archive()` | Returns a list of series from archive from Cricbuzz. | +| `.get_series_from_archive()` | Returns a list of series from the archive from Cricbuzz. | | `.get_matches_by_day()` | Returns a dictionary of matches by day from Cricbuzz. | | `.get_series_matches()` | Returns a list of matches in a series from Cricbuzz. | | `.get_series_stats()` | Returns a list of stats of players in a series from Cricbuzz. | @@ -475,13 +475,13 @@ shots = dribbble.Dribbble() | Methods | Details | | --------------------- | ------------------------------------------------------------------------------------------------------------------------------- | -| `.get_shots()` | Returns the latest shots along with their title, designer and designer url like and view count and link. | -| `.search(topic)` | Returns the latest shots along with their title, designer and designer url like and view count and link for the searched topic. | -| `.get_animation()` | Returns the latest animation along with their title, designer and designer url like and view count and link. | -| `.get_branding()` | Returns the latest branding along with their title, designer and designer url like and view count and link. | -| `.get_illustration()` | Returns the latest illustration along with their title, designer and designer url like and view count and link. | -| `.get_mobile()` | Returns the latest mobile shots along with their title, designer and designer url like and view count and link. | -| `.get_webdesign()` | Returns the latest web-design shots along with their title, designer and designer url like and view count and link. | +| `.get_shots()` | Returns the latest shots along with their title, designer, designer URL, like and view count, and link. | +| `.search(topic)` | Returns the latest shots along with their title, designer, designer URL, like and view count, and link for the searched topic. | +| `.get_animation()` | Returns the latest animation along with their title, designer, designer URL, like and view count, and link. | +| `.get_branding()` | Returns the latest branding along with their title, designer, designer URL, like and view count, and link. | +| `.get_illustration()` | Returns the latest illustration along with their title, designer, designer URL, like and view count, and link. | +| `.get_mobile()` | Returns the latest mobile shots along with their title, designer, designer URL, like and view count, and link. | +| `.get_webdesign()` | Returns the latest web-design shots along with their title, designer, designer URL, like and view count, and link. | --- @@ -497,14 +497,14 @@ Create an instance of `EazyDiner` class. restaurants = eazydiner.EazyDiner(location="city-name") ``` -| Methods | Details | -| ------------------------- | --------------------------------------------------------------------------------- | -| `.get_restaurants()` | Returns the restaurants name, location, rating, cuisine and prices. | -| `.get_breakfast()` | Returns the restaurants name, location, rating, cuisine and prices for Breakfast. | -| `.get_lunch()` | Returns the restaurants name, location, rating, cuisine and prices for Lunch. | -| `.get_dinner()` | Returns the restaurants name, location, rating, cuisine and prices for Dinner. | -| `.dinner_with_discount()` | Returns a list of restaurants from the entered location with a 50% offer. | -| `.get_top10()` | Returns a list of the top 10 restaurants from a given city. | +| Methods | Details | +| ------------------------- | ----------------------------------------------------------------------------------- | +| `.get_restaurants()` | Returns the restaurant's name, location, rating, cuisine, and prices. | +| `.get_breakfast()` | Returns the restaurant's name, location, rating, cuisine, and prices for Breakfast. | +| `.get_lunch()` | Returns the restaurant's name, location, rating, cuisine, and prices for Lunch. | +| `.get_dinner()` | Returns the restaurant's name, location, rating, cuisine, and prices for Dinner. | +| `.dinner_with_discount()` | Returns a list of restaurants from the entered location with a 50% offer. | +| `.get_top10()` | Returns a list of the top 10 restaurants from a given city. | --- @@ -565,10 +565,10 @@ item = flipkart.Flipkart() | `.TVs()` | Returns the list of TV sets on flipkart | | `.bestseller_books()` | Returns the list of bestselling books data listed on Flipkart. | | `.mobiles()` | Returns the list of mobile phones under 50K along with their data. | -| `.sport_shoes()` | Returns the list of trendong sport shoes data. | -| `.laptops()` | Returns the list of laptop from flipkart. | -| `.camera()` | Returns the list of camera from flipkart. | -| `.computer()` | Returns the list of computer from flipkart. | +| `.sport_shoes()` | Returns the list of trending sport shoes data. | +| `.laptops()` | Returns the list of laptops from flipkart. | +| `.camera()` | Returns the list of cameras from flipkart. | +| `.computer()` | Returns the list of computers from flipkart. | | `.tablets()` | Returns the list of tablets from flipkart. | | `.cycle()` | Returns the list of bicycles from flipkart. | | `.printers()` | Returns the list of printers from flipkart. | @@ -590,13 +590,13 @@ cloth = flipkart.FlipkartClothing() | Methods | Details | | -------------------------- | -------------------------------------------------------------- | -| `.scrape()` | Returns the list of t-shirts with other relevant info | +| `.scrape()` | Returns the list of t-shirts with other relevant info. | | `.range()` | Returns the list of t-shirts between a particular price range. | -| `.minrating()` | Returns the list of t-shirts havinga minimum given rating. | +| `.minrating()` | Returns the list of t-shirts having a minimum given rating. | | `.gendermale()` | Returns the list of t-shirts which are for males. | -| `.genderfemale()` | Returns the list of t-shirts which are there for females. | -| `.size()` | Returns the list of tshirts havning a particular size. | -| `formal_shirts_for_male()` | It returns those t-shirts which are of a particular size | +| `.genderfemale()` | Returns the list of t-shirts that are there for females. | +| `.size()` | Returns the list of t-shirts having a particular size. | +| `formal_shirts_for_male()` | It returns those t-shirts which are of a particular size. | --- @@ -610,7 +610,7 @@ item = flipkart.FlipkartLaptops() | Methods | Details | | ------------ | ---------------------------------------- | -| `.laptops()` | Returns the list of laptops with details | +| `.laptops()` | Returns the list of laptops with details.| --- @@ -628,8 +628,8 @@ flyrobu = flyrobu.Flyrobu() | Methods | Details | | ------------------------------------ | --------------------------------------------------------------------------------------------------------------- | -| `.search(keyword)` | Returns the json data of all the details related to search with informing about the total amount of items found | -| `.get_product_details(product_name)` | Returns the json data of the product details based on the given `product_name` | +| `.search(keyword)` | Returns the json data of all the details related to search by informing about the total amount of items found. | +| `.get_product_details(product_name)` | Returns the json data of the product details based on the given `product_name`. | --- @@ -677,12 +677,12 @@ Create an instance of `Movie` class. movie = imdb.Movie(movie_name) ``` -| Methods | Details | -| ---------------- | -------------------------------------------------------- | -| `.rating()` | Returns the IMDB rating of the movie | -| `.description()` | Returns the description, cast and director of the movie | -| `.more_movies()` | Returns similar movies recommended by IMDB | -| `.box_office()` | Returns budget, gross worldwide collections of the movie | +| Methods | Details | +| ---------------- | --------------------------------------------------------- | +| `.rating()` | Returns the IMDB rating of the movie. | +| `.description()` | Returns the description, cast, and director of the movie. | +| `.more_movies()` | Returns similar movies recommended by IMDB. | +| `.box_office()` | Returns budget, gross worldwide collections of the movie. | #### IMDB - Actor @@ -692,11 +692,11 @@ Create an instance of `Actor` class. actor = imdb.Actor(actor_name) ``` -| Methods | Details | -| ------------------- | ------------------------------------------------------- | -| `.popular_movies()` | Returns the popular movies in which the actor has acted | -| `.all_movies()` | Returns all movies acted in and upcoming movies | -| `.awards()` | Returns the number of awards and nominations | +| Methods | Details | +| ------------------- | -------------------------------------------------------- | +| `.popular_movies()` | Returns the popular movies in which the actor has acted. | +| `.all_movies()` | Returns all movies acted in, and upcoming movies. | +| `.awards()` | Returns the number of awards and nominations. | #### IMDB - Celeb @@ -706,9 +706,9 @@ Create an instance of `Celeb` class. celeb = imdb.Celeb() ``` -| Methods | Details | -| --------------- | -------------------------------------------------- | -| `.top_celebs()` | Returns the name, roles, famous movie of the celeb | +| Methods | Details | +| --------------- | ------------------------------------------------------ | +| `.top_celebs()` | Returns the name, roles, and famous movie of the celeb.| #### IMDB - Indian Movies @@ -720,7 +720,7 @@ indianmovies = imdb.IndianMovies() | Methods | Details | | ---------------------- | --------------------------------------------- | -| `.top_indian_movies()` | Returns the current list of top Indian movies | +| `.top_indian_movies()` | Returns the current list of top Indian movies.| #### IMDB - Box Office @@ -730,6 +730,7 @@ Create an instance of `BoxOffice` class. boxoffice = imdb.BoxOffice() ``` -| Methods | Details | -| --------------- | ----------------------------------------------------------------------------- | -| `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released | +| Methods | Details | +| --------------- | ------------------------------------------------------------------------------ | +| `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released.| + diff --git a/src/scrape_up/academia/academia.py b/src/scrape_up/academia/academia.py index bdd440aa..7227926a 100644 --- a/src/scrape_up/academia/academia.py +++ b/src/scrape_up/academia/academia.py @@ -22,7 +22,7 @@ def __init__(self): "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" } - def get_research_topics(self, topic="None"): + def get_research_topics(self): """ Fetches and returns research topics starting with the given letter.\n Param `letter`: The letter to filter research topics (default is "None" to get all topics).\n @@ -45,8 +45,7 @@ def get_research_topics(self, topic="None"): ``` """ try: - letter = topic.capitalize() - url = f"https://www.academia.edu/topics/{letter}" + url = f"https://www.academia.edu/topics/" html_text = requests.get(url, headers=self.headers).text soup = BeautifulSoup(html_text, "lxml") diff --git a/src/scrape_up/amazon/__init__.py b/src/scrape_up/amazon/__init__.py new file mode 100644 index 00000000..f91de2c0 --- /dev/null +++ b/src/scrape_up/amazon/__init__.py @@ -0,0 +1,3 @@ +from .products import Product + +__all__ = ["Product"] diff --git a/src/scrape_up/amazon/products.py b/src/scrape_up/amazon/products.py index c6691406..15ce0d6e 100644 --- a/src/scrape_up/amazon/products.py +++ b/src/scrape_up/amazon/products.py @@ -2,82 +2,86 @@ from bs4 import BeautifulSoup -# scraping amazon product page class Product: + """Class for fetching and retrieving product details from Amazon.""" + def __init__(self, product_name: str): - self.product_name = product_name + """ + Initialize the Product object with a product name. - def get_product(self): + Args: + product_name (str): The name of the product. """ - Class - `Product`\n - Example -\n - ```python - product = Product(product_name="watch") - product.get_product() - ``` - Return\n - ```python - return - { - "data": product_link, - "message": f"Product data has been fetched", + self.product_name = product_name + self.headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36" } - ``` + + def fetch_product_page(self): + """ + Fetch the HTML content of the Amazon search page for the product. + + Returns: + BeautifulSoup: A BeautifulSoup object containing the parsed HTML content of the search page. + + Raises: + Exception: If there is an error fetching the page. """ try: - product_name = self.product_name - product_name = product_name.replace(" ", "+") + product_name = self.product_name.replace(" ", "+") url = f"https://www.amazon.in/s?k={product_name}" - headers = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \ - (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36" - } - r = requests.get(url, headers=headers) - soup = BeautifulSoup(r.content, "html.parser") - product = soup.find("div", {"class": "s-product-image-container"}) - product_link = product.find("a", {"class": "a-link-normal"})["href"] - product_link = "https://www.amazon.in" + product_link - return { - "data": product_link, - "message": f"Product data has been fetched", - } - except: - return { - "data": None, - "message": f"Unable to fetch product's data", - } + r = requests.get(url, headers=self.headers) + r.raise_for_status() # Raise HTTPError for bad responses + return BeautifulSoup(r.content, "html.parser") + except requests.RequestException as e: + raise Exception(f"Error fetching product page: {str(e)}") + + def get_product(self): + """ + Get the link to the product from the search page. + + Returns: + dict: A dictionary containing the product link and a message indicating success. + + Raises: + Exception: If there is an error fetching the product link. + """ + try: + soup = self.fetch_product_page() + product = soup.find("div", {"class": "s-result-item"}) + if product: + product_link = product.find("a", {"class": "a-link-normal"})["href"] + product_link = "https://www.amazon.in" + product_link + return { + "data": product_link, + "message": "Product data has been fetched", + } + else: + return { + "data": None, + "message": "Product not found", + } + except Exception as e: + raise Exception(f"Unable to fetch product's data: {str(e)}") - # Get product details def get_product_details(self): """ - Class - `Product`\n - Example -\n - ```python - product = Product(product_name="watch") - product.get_product_details() - ``` - Return\n - ```python - return - { - "data": product_details, - "message": f"Product detail has been fetched", - } - ``` + Get details of the product from the product page. + + Returns: + dict: A dictionary containing product details and a message indicating success. + + Raises: + Exception: If there is an error fetching the product details. """ try: product_link = self.get_product()["data"] - headers = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \ - (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36" - } - r = requests.get(product_link, headers=headers) + r = requests.get(product_link, headers=self.headers) + r.raise_for_status() # Raise HTTPError for bad responses soup = BeautifulSoup(r.content, "html.parser") product_name = soup.find("span", {"id": "productTitle"}).text.strip() product_price = soup.find("span", {"class": "a-price-whole"}).text.strip() - product_rating = soup.find( - "span", {"class": "a-size-base a-color-base"} - ).text.strip() + product_rating = soup.find("span", {"class": "a-icon-alt"}).text.strip() product_details = { "product_name": product_name, "product_price": product_price, @@ -86,83 +90,53 @@ def get_product_details(self): } return { "data": product_details, - "message": f"Product detail has been fetched", - } - except: - return { - "data": None, - "message": f"Unable to fetch product detail", + "message": "Product detail has been fetched", } + except Exception as e: + raise Exception(f"Unable to fetch product detail: {str(e)}") - # Get product image def get_product_image(self): """ - Class - `Product`\n - Example -\n - ```python - product = Product(product_name="watch") - product.get_product_image() - ``` - Return\n - ```python - return - { - "data": product_image, - "message": f"Product image has been fetched", - } - ``` + Get the URL of the product image from the product page. + + Returns: + dict: A dictionary containing the product image URL and a message indicating success. + + Raises: + Exception: If there is an error fetching the product image. """ try: product_link = self.get_product()["data"] - headers = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \ - (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36" - } - r = requests.get(product_link, headers=headers) + r = requests.get(product_link, headers=self.headers) + r.raise_for_status() # Raise HTTPError for bad responses soup = BeautifulSoup(r.content, "html.parser") - product_image = soup.find( - "img", {"class": "a-dynamic-image a-stretch-horizontal"} - )["src"] - + product_image = soup.find("div", {"id": "imgTagWrapperId"}).find("img")[ + "data-old-hires" + ] return { "data": product_image, - "message": f"Product image has been fetched", - } - except: - return { - "data": None, - "message": f"Unable to fetch product image", + "message": "Product image has been fetched", } + except Exception as e: + raise Exception(f"Unable to fetch product image: {str(e)}") - # Get customer reviews def customer_review(self): """ - Class - `Product`\n - Example -\n - ```python - product = Product(product_name="watch") - product.customer_review() - ``` - Return\n - ```python - return - { - "data": review, - "message": f"Product review has been fetched", - } - ``` + Get customer reviews of the product from the product page. + + Returns: + dict: A dictionary containing the product reviews and a message indicating success. + + Raises: + Exception: If there is an error fetching the product reviews. """ try: product_link = self.get_product()["data"] - headers = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \ - (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36" - } - r = requests.get(product_link, headers=headers) + r = requests.get(product_link, headers=self.headers) + r.raise_for_status() # Raise HTTPError for bad responses soup = BeautifulSoup(r.content, "html.parser") - review_elements = soup.find_all("div", {"data-hook": "review"}) - + reviews = [] for review_element in review_elements: reviewer_name = review_element.find( "span", {"class": "a-profile-name"} @@ -181,13 +155,17 @@ def customer_review(self): review_text = review_element.find( "span", {"data-hook": "review-body"} ).text.strip() - review = [reviewer_name, rating, review_title, review_date, review_text] - return { - "data": review, - "message": f"Product review has been fetched", - } - except: + review = { + "reviewer_name": reviewer_name, + "rating": rating, + "review_title": review_title, + "review_date": review_date, + "review_text": review_text, + } + reviews.append(review) return { - "data": None, - "message": f"Unable to fetch product review", + "data": reviews, + "message": "Product reviews have been fetched", } + except Exception as e: + raise Exception(f"Unable to fetch product reviews: {str(e)}") diff --git a/src/scrape_up/ambitionBox/company.py b/src/scrape_up/ambitionBox/company.py new file mode 100644 index 00000000..49f30251 --- /dev/null +++ b/src/scrape_up/ambitionBox/company.py @@ -0,0 +1,69 @@ +import requests +from bs4 import BeautifulSoup + + +class Comapiens: + def __init__(self,num_pages: int=1): + self.num_pages = num_pages + + def write_sorted_list(self, file, company_list): + + company_list.sort(key=lambda x: x[1], reverse=True) + for company_name, rating in company_list: + file.write(f"{company_name.strip()} {rating}\n") + + def scrape_companies(self): + + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" + } + + for page in range(1, self.num_pages + 1): + print(f"Scraping webpage number: {page} of {self.num_pages}") + + url = f"https://www.ambitionbox.com/list-of-companies?page={page}" + response = requests.get(url, headers=headers) + + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'lxml') + + companies = soup.find_all('div', class_="companyCardWrapper") + + company_ratings = [] + + for company in companies: + company_name = company.find('h2', class_="companyCardWrapper__companyName").text.strip() + company_star = company.find('span', class_="companyCardWrapper__companyRatingValue") + + if company_name and company_star: + try: + rating = float(company_star.text) + company_ratings.append((company_name, rating)) + except ValueError: + print(f"Error parsing rating for company: {company_name}") + + with open("src/scrape_up/ambitionBox/company_ratings.txt", "a") as f: + f.write(f"\nPAGE: {url}\n") + f.write("COMPANY UNDER 5 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 4 < r[1] <= 5]) + + f.write("\nCOMPANY UNDER 4 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 3 < r[1] <= 4]) + + # Corrected indentation for following lines + f.write("\nCOMPANY UNDER 3 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 2 < r[1] <= 3]) + + f.write("\nCOMPANY UNDER 2 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 1 < r[1] <= 2]) + + f.write("\nCOMPANY UNDER 1 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 0 < r[1] <= 1]) + else: + print(f"Error scraping page {page}: {response.status_code}") + + +if __name__ == "__main__": + c = Comapiens(10) + c.scrape_companies() diff --git a/src/scrape_up/ambitionBox/company_ratings.txt b/src/scrape_up/ambitionBox/company_ratings.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/scrape_up/coursera/__init__.py b/src/scrape_up/coursera/__init__.py new file mode 100644 index 00000000..57fb5840 --- /dev/null +++ b/src/scrape_up/coursera/__init__.py @@ -0,0 +1,3 @@ +from .courses import Coursera + +__all__ = ["Coursera"] diff --git a/src/scrape_up/coursera/courses.py b/src/scrape_up/coursera/courses.py index 9eeb77f8..8c404c83 100644 --- a/src/scrape_up/coursera/courses.py +++ b/src/scrape_up/coursera/courses.py @@ -11,7 +11,7 @@ class Coursera: ``` | Methods | Details | | ------------------------------------- | ------------------------------------------------------------------------------------------ | - | `.get_courses()` | Returns the courses with title, teached by, skills, rating, review count, img url and link | + | `.get_courses()` | Returns the courses with title, taught by, skills, rating, review count, img url and link | | `.fetchModules(course='Course Name')` | Returns the modules associated with the Coursera. | """ @@ -31,7 +31,7 @@ def get_courses(self): [ { "title": Title of the course - "teached_by": Organization which teaches the course + "taught_by": Organization which teaches the course "skills": Skills learnt from the course "rating": Rating of the course "review_count": Np. of review of the course @@ -47,34 +47,36 @@ def get_courses(self): res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") - courses_data = {"courses": []} - - courses = soup.find_all("div", class_="css-1cj5od") + courses_data = [] + courses = soup.find_all("div", class_="css-1evtm7z") for c in courses: try: - title = c.find("h2", class_="cds-119 css-h1jogs cds-121").getText() - teached_by = c.find( - "span", class_="cds-119 css-1mru19s cds-121" + title = c.find("a").getText() + taught_by = c.find( + "p", class_="cds-ProductCard-partnerNames css-vac8rf" ).getText() - skills = c.find("p", class_="cds-119 css-12ksubz cds-121").getText() - rating = c.find("p", class_="cds-119 css-11uuo4b cds-121").getText() + skills = c.find( + "div", class_="cds-CommonCard-bodyContent" + ).p.getText()[20:] + review_div = c.find("div", class_="product-reviews css-pn23ng") + rating = review_div.find("p", class_="css-2xargn").getText() review_count = ( - c.find("p", class_="cds-119 css-dmxkm1 cds-121") + review_div.find("p", class_="css-vac8rf") .getText() .replace("(", "") .replace(")", "") ) - img = c.find("div", class_="css-1doy6bd") + img = c.find("div", class_="cds-CommonCard-previewImage") img_url = img.find("img")["src"] link = "https://www.coursera.org" + c.find("a")["href"] except: pass - courses_data["courses"].append( + courses_data.append( { "title": title, - "teached_by": teached_by, + "taught_by": taught_by, "skills": skills, "rating": rating, "review_count": review_count, @@ -82,7 +84,7 @@ def get_courses(self): "link": link, } ) - return courses_data["courses"] + return courses_data except: return None @@ -92,15 +94,31 @@ def fetch_modules(self, course): Example: ``` courses = Coursera(topic="ml") - courses.fetch_modules() + courses.fetch_modules(course="Machine Learning with Python) ``` + Note: Some courses have specializations instead of modules. Make sure the code works for both. + Returns: + For modules: + ```js + { + "Module 1": Name of the first module + } + ``` + + For specializations: ```js - [ modules ] + { + "Specialization 1": { + Title: Name of the specialization + Link: Link to the specialization page + } + } ``` """ + courseList = self.get_courses() - global ccourseURL + global courseURL for i in courseList: if i["title"] == course: courseURL = i["link"] @@ -115,22 +133,28 @@ def fetch_modules(self, course): "data" ]["product"] - modules = soup.find_all("div", class_="SyllabusModule") - modules_data = [] - for m in modules: - mod = m.find("h3", class_="headline-2-text bold m-b-2").getText() - modules_data.append(mod) - - if modules_data == []: - modules = soup.find_all("div", class_="css-13tws8d") - for m in modules: - mod = m.find( - "a", class_="cds-119 cds-113 cds-115 css-1uw69sh cds-142" - ).getText() - modules_data.append(mod) + type = "Module" + module_section = soup.find("div", id="modules") + if module_section == None: + module_section = soup.find("div", id="courses") + type = "Specialization" + modules = module_section.find_all( + "div", attrs={"data-testid": "accordion-item"} + ) + modules_data = {} + for index, m in enumerate(modules): + if type == "Module": + # For modules + mod = m.find("h3").getText() + else: + # For specializations + mod = {} + mod["Title"] = m.find("h3").getText() + mod["Link"] = "https://www.coursera.org" + m.find("a")["href"] + modules_data[f"{type} {index+1}"] = mod return modules_data else: - return "Server Error. Retry" + return None except: - return "No modules for this course" + return None diff --git a/src/scrape_up/geeksforgeeks/__init__.py b/src/scrape_up/geeksforgeeks/__init__.py new file mode 100644 index 00000000..741b1929 --- /dev/null +++ b/src/scrape_up/geeksforgeeks/__init__.py @@ -0,0 +1,3 @@ +from .geeksforgeeks import Geeksforgeeks + +__all__ = ["Geeksforgeeks"] diff --git a/src/scrape_up/geeksforgeeks/geeksforgeeks.py b/src/scrape_up/geeksforgeeks/geeksforgeeks.py new file mode 100644 index 00000000..68ba5272 --- /dev/null +++ b/src/scrape_up/geeksforgeeks/geeksforgeeks.py @@ -0,0 +1,94 @@ +import requests +from bs4 import BeautifulSoup + + +class Geeksforgeeks: + """ + Create an instance of the class `GeeksforGeeks` + ```py + gfg = Geeksforgeeks(user="nikhil25803") + gfg.get_profile() + ``` + + | Methods | Details | + | ----------------- | ---------------------------------------------------------------------------------- | + | `.get_profile()` | Returns the user data in json format. | + + + Response: + ```js + { + "username": "22cs3iehq", + "collage_name": "Rajiv Gandhi Institute of Petroleum Technology (RGIPT) Rae Bareli", + "collage_rank": "1", + "score": { + "overall_coding_score": "6085", + "monthly_coding_score": "14" + }, + "languages_used": "C++, Javascript, Python, Java, C", + "current_potd_streak": "407/1015", + "total_problem_solved": "1534", + "campus_ambassader": "22cs3iehq" + } + ``` + """ + + def __init__(self, user): + self.user = user + + def get_profile(self): + try: + url = f"https://www.geeksforgeeks.org/user/{self.user}/" + headers = {"User-Agent": "scrapeup"} + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.text, "html.parser") + main_info = soup.find("div", class_="AuthLayout_head_content__ql3r2") + user_data = [] + + username = main_info.find( + "div", + class_="profilePicSection_head_userHandleAndFollowBtnContainer_userHandle__p7sDO", + ).text + collage_rank = main_info.find( + "span", class_="profilePicSection_head_userRankContainer_rank__abngM" + ).text + collage = main_info.find( + "div", class_="educationDetails_head_left--text__tgi9I" + ).text + languages = main_info.find( + "div", class_="educationDetails_head_right--text__lLOHI" + ).text + campus_ambaasder = soup.find( + "a", class_="basicUserDetails_head_CA--text__IoHEU" + ).text + current_potd_streak = main_info.find( + "div", class_="circularProgressBar_head_mid_streakCnt__MFOF1 tooltipped" + ).text + score = main_info.find_all( + "div", class_="scoreCard_head_card_left--score__pC6ZA" + ) + overall_coding_score = score[0].text + total_problem_solved = score[1].text + monthly_coding_score = score[2].text + + user_data = { + "username": username, + "collage_name": collage, + "collage_rank": collage_rank, + "score": { + "overall_coding_score": overall_coding_score, + "monthly_coding_score": monthly_coding_score, + }, + "languages_used": languages, + "current_potd_streak": current_potd_streak, + "total_problem_solved": total_problem_solved, + "campus_ambassader": campus_ambaasder, + } + + return user_data + except: + return None + + +gfg = Geeksforgeeks(user="nikhil25803") +print(gfg.get_profile()) diff --git a/src/scrape_up/leetcode/leetcode_scraper.py b/src/scrape_up/leetcode/leetcode_scraper.py index e25e088d..b29cbc60 100644 --- a/src/scrape_up/leetcode/leetcode_scraper.py +++ b/src/scrape_up/leetcode/leetcode_scraper.py @@ -145,9 +145,7 @@ def get_problems_solved(self): try: total_problems = soup.find( "div", - { - "class": "text-[24px] font-medium text-label-1 dark:text-dark-label-1" - }, + {"class": "text-[30px] font-semibold leading-[32px]"}, ) return { "data": total_problems.text, diff --git a/src/scrape_up/wuzzuf/__init__.py b/src/scrape_up/wuzzuf/__init__.py new file mode 100644 index 00000000..1e78fe07 --- /dev/null +++ b/src/scrape_up/wuzzuf/__init__.py @@ -0,0 +1,3 @@ +from .wuzzuf import Jobs + +__all__ = ["Jobs"] diff --git a/src/scrape_up/wuzzuf/wuzzuf.py b/src/scrape_up/wuzzuf/wuzzuf.py new file mode 100644 index 00000000..3d42efc3 --- /dev/null +++ b/src/scrape_up/wuzzuf/wuzzuf.py @@ -0,0 +1,131 @@ +import requests +from bs4 import BeautifulSoup +from time import sleep + + +class Jobs: + """ + Create an instance of the class `Jobs` + ```python + scraper = Jobs() + ``` + | Methods | Details | + | ----------------------------- | -------------------------------------------------------------------------------------------------- | + | `.filter_job()` | Apply filters to the job search using parameters like title, country, city, minimum and maximum years of experience. | + | `.fetch_jobs()` | Fetch job listings based on the applied filters, with an optional maximum number of pages to scrape. | + """ + + def __init__(self): + self.url = "https://wuzzuf.net/search/jobs/?" + + def filter_job( + self, + title=None, + country=None, + city=None, + min_years_of_experience=None, + max_years_of_experience=None, + ): + """ + Apply filters to the job search. + + Parameters: + - `title` (str): Job title to search for. + - `country` (str): Country to search for jobs in. + - `city` (str): City to search for jobs in. + - `min_years_of_experience` (int): Minimum years of experience required. + - `max_years_of_experience` (int): Maximum years of experience allowed. + + Example: + ```python + scraper.filter_job(title="software engineer", country="Egypt", city="Cairo", min_years_of_experience=2, max_years_of_experience=5) + ``` + """ + if title: + title = title.replace(" ", "+") + self.url += f"q={title}" + if country: + self.url += f"&filters[country][0]={country.strip().capitalize()}" + if city: + self.url += f"&filters[city][0]={city.strip().capitalize()}" + if min_years_of_experience: + self.url += ( + f"&filters[years_of_experience_min][0]={min_years_of_experience}" + ) + if max_years_of_experience: + self.url += ( + f"&filters[years_of_experience_max][0]={max_years_of_experience}" + ) + + def __fetch_page_jobs(self, page_num): + response = requests.get(self.url + f"&start={page_num}") + if response.status_code == 200: + parsed_html = BeautifulSoup(response.content, "lxml") + jobs_data = parsed_html.find_all("div", {"class": "css-1gatmva e1v1l3u10"}) + job_sub_list = [] + for job_data in jobs_data: + job = { + "name": self.__get_job_name(job_data), + "url": self.__get_job_url(job_data), + "company": self.__get_job_company(job_data), + "location": self.__get_job_location(job_data), + "published_time": self.__get_published_time(job_data), + "properties": self.__get_job_properties(job_data), + } + job_sub_list.append(job) + return job_sub_list + else: + raise None + + def fetch_jobs(self, max_page_number=50): + """ + Fetch job listings based on the applied filters. + + Parameters: + - `max_page_number` (int): Maximum number of pages to scrape (default is 50). + + Returns: + - `list`: A list of dictionaries representing the fetched job listings. + + Example: + ```python + jobs = scraper.fetch_jobs(max_page_number=5) + ``` + """ + job_list = [] + try: + for page_num in range(max_page_number): + job_sub_list = self.__fetch_page_jobs(page_num) + if job_sub_list: + job_list.extend(job_sub_list) + else: + break + sleep(1) + except requests.RequestException as e: + return None + return job_list + + def __get_job_name(self, job_data): + return job_data.find("h2", {"class": "css-m604qf"}).find("a").text.strip() + + def __get_job_url(self, job_data): + return job_data.find("h2", {"class": "css-m604qf"}).find("a")["href"] + + def __get_job_company(self, job_data): + return job_data.find("div", {"class": "css-d7j1kk"}).find("a").text[:-1].strip() + + def __get_job_location(self, job_data): + data = job_data.find("span", {"class": "css-5wys0k"}) + return data.text.strip() if data else None + + def __get_published_time(self, job_data): + return ( + job_data.find("div", {"class": "css-4c4ojb"}) + or job_data.find("div", {"class": "css-do6t5g"}) + ).text.strip() + + def __get_job_properties(self, job_data): + job_properties_string = " ,".join( + [prop.text for prop in job_data.find_all("span", {"class": "eoyjyou0"})] + ) + return job_properties_string if job_properties_string else None diff --git a/src/test/academia_test.py b/src/test/academia_test.py index 54d6a8a3..e00e302a 100644 --- a/src/test/academia_test.py +++ b/src/test/academia_test.py @@ -8,7 +8,7 @@ def setUp(self): def test_get_research_topics(self): academia = Academia() - result = academia.get_research_topics(topic="Machine Learning") + result = academia.get_research_topics() self.assertIsNotNone(result) self.assertIsInstance(result, list) diff --git a/src/test/amazon_test.py b/src/test/amazon_test.py new file mode 100644 index 00000000..e707a623 --- /dev/null +++ b/src/test/amazon_test.py @@ -0,0 +1,63 @@ +import unittest +from scrape_up.amazon import Product + + +class AmazonTest(unittest.TestCase): + def setUp(self): + self.product = Product("Watch") + + def test_get_product(self): + result = self.product.get_product_details() + self.assertIsNotNone(result) + self.assertIsInstance(result, dict) + if result is not None: + self.assertIn("data", result) + self.assertIn("message", result) + self.assertIsNotNone(result["data"], str) + if result["data"] is not None: + self.assertIsInstance(result["data"], str) + if result["message"] is not None: + self.assertIsInstance(result["message"], str) + + def test_get_product_details(self): + result = self.product.get_product_details() + self.assertIsNotNone(result) + self.assertIsInstance(result, dict) + if result is not None: + self.assertIn("data", result) + self.assertIn("message", result) + self.assertIsNotNone(result["data"], str) + if result["data"] is not None: + self.assertIsInstance(result["data"], str) + if result["message"] is not None: + self.assertIsInstance(result["message"], str) + + def test_get_product_image(self): + result = self.product.get_product_details() + self.assertIsNotNone(result) + self.assertIsInstance(result, dict) + if result is not None: + self.assertIn("data", result) + self.assertIn("message", result) + self.assertIsNotNone(result["data"], str) + if result["data"] is not None: + self.assertIsInstance(result["data"], str) + if result["message"] is not None: + self.assertIsInstance(result["message"], str) + + def test_customer_review(self): + result = self.product.get_product_details() + self.assertIsNotNone(result) + self.assertIsInstance(result, dict) + if result is not None: + self.assertIn("data", result) + self.assertIn("message", result) + self.assertIsNotNone(result["data"], str) + if result["data"] is not None: + self.assertIsInstance(result["data"], str) + if result["message"] is not None: + self.assertIsInstance(result["message"], str) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/test/bbc_test.py b/src/test/bbc_test.py new file mode 100644 index 00000000..9075c4e7 --- /dev/null +++ b/src/test/bbc_test.py @@ -0,0 +1,73 @@ +import unittest +from scrape_up.bbcnews import BBCNews + + +class TestBBCNews(unittest.TestCase): + """ + | Methods | Details | + | ------------------ | -------------------------------------------------------- | + | `.get_headlines()` | Returns the list of object containig the headlines | + | `get_article()` | Returns an object with proper details about the articles | + + """ + + def setUp(self): + """ + Initialize a BBCNews instance before each test method. + """ + self.bbc_scraper = BBCNews() + + def test_get_headlines(self): + """ + Testing the get_headlines() method. + """ + try: + headlines = self.bbc_scraper.get_headlines() + + # Check if headlines is a list of dictionaries + if headlines is not None: + self.assertIsInstance(headlines, list) + for headline in headlines: + self.assertIsInstance(headline, dict) + self.assertIn("index", headline) + self.assertIn("headline", headline) + + # Check if all headlines have unique indices + indices = {headline["index"] for headline in headlines} + self.assertEqual( + len(indices), len(headlines), "Duplicate indices found in headlines" + ) + # Check if headlines list is not empty + self.assertGreater(len(headlines), 0, "No headlines extracted") + except: + return None + + def test_get_article(self): + """ + Testing the get_article(url) method. + """ + try: + valid_url = "https://www.bbc.co.uk/news/world-europe-61258011" # Test with a valid article URL + article = self.bbc_scraper.get_article(valid_url) + + if article is not None: + self.assertIsInstance( + article, dict + ) # Check if article is a dictionary or not + self.assertIn( + "main_heading", article + ) # Does it contain main_heading or not + self.assertIn("time", article) # Does it contain time or not + self.assertIn("text", article) # Does it contain text or not + + invalid_url = "https://www.bbc.co.uk/news/non-existent-article" # Test with an invalid article URL + invalid_article = self.bbc_scraper.get_article( + invalid_url + ) # Should return None + self.assertIsNone(invalid_article, "Invalid URL should return None") + except: + return None + + +if __name__ == "__main__": + unittest.main() diff --git a/src/test/coursera_test.py b/src/test/coursera_test.py new file mode 100644 index 00000000..edc429eb --- /dev/null +++ b/src/test/coursera_test.py @@ -0,0 +1,56 @@ +import unittest +from scrape_up.coursera import Coursera + + +class TestCoursera(unittest.TestCase): + def setUp(self, topic="Machine Learning"): + self.scraper = Coursera(topic) + + def test_get_courses(self): + result = self.scraper.get_courses() + self.assertIsNotNone(result) + self.assertIsInstance(result, list) + + if result is not None: + for topic in result: + self.assertIn("title", topic) + self.assertIn("taught_by", topic) + self.assertIn("skills", topic) + self.assertIn("rating", topic) + self.assertIn("review_count", topic) + self.assertIn("img_url", topic) + self.assertIn("link", topic) + + self.assertIsInstance(topic["title"], str) + self.assertIsInstance(topic["taught_by"], str) + self.assertIsInstance(topic["skills"], str) + self.assertIsInstance(topic["rating"], str) + self.assertIsInstance(topic["review_count"], str) + self.assertIsInstance(topic["img_url"], str) + self.assertIsInstance(topic["link"], str) + + def test_fetch_modules_with_modules(self): + result = self.scraper.fetch_modules(course="Machine Learning with Python") + self.assertIsNotNone(result) + self.assertIsInstance(result, dict) + + if result is not None: + for key, value in result.items(): + self.assertIsInstance(value, str) + + def test_fetch_modules_with_specializations(self): + result = self.scraper.fetch_modules(course="Machine Learning") + self.assertIsNotNone(result) + self.assertIsInstance(result, dict) + + if result is not None: + for key, value in result.items(): + self.assertIsInstance(value, dict) + self.assertIn("Title", value) + self.assertIn("Link", value) + self.assertIsInstance(value["Title"], str) + self.assertIsInstance(value["Link"], str) + + +if __name__ == "__main__": + unittest.main()