Skip to content

Commit

Permalink
web scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
yozaam committed Nov 8, 2020
1 parent 3aaff8a commit 49500c3
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 10 deletions.
19 changes: 19 additions & 0 deletions day15_web_scraping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import requests
import re
import pandas as pd


headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'}

r = requests.get(f"https://parade.com/947956/parade/riddles/", headers = headers).text

print(r)

riddle = re.findall('Riddle:</b>(.*?)<br><b>' , r, re.S)
answer = re.findall('<b>Answer:</b>(.*?)</p>' , r, re.S)

dic = {'riddle':riddle,'answer':answer}

df = pd.DataFrame(dic)

print(df)
24 changes: 14 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
# Required concepts: https://www.w3schools.com/python/python_intro.asp OOPS & list comprehension
import requests
import re
import pandas as pd

print('Question: Make a list of objects with different names')

class Bird:
def __init__(self,name):
# this constructor creates a Bird with name
self.name = name
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'}

all_names = ['parrot', 'ostrich']
r = requests.get(f"https://parade.com/947956/parade/riddles/", headers = headers).text

all_birds = [Bird(name) for name in all_names]
print(r)

for bird in all_birds:
print(bird.name)
riddle = re.findall('Riddle:</b>(.*?)<br><b>' , r, re.S)
answer = re.findall('<b>Answer:</b>(.*?)</p>' , r, re.S)

dic = {'riddle':riddle,'answer':answer}

df = pd.DataFrame(dic)

print(df)
130 changes: 130 additions & 0 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[tool.poetry]
name = "repl_python3_placementTraining"
version = "0.1.0"
description = ""
authors = ["repl.it user <[email protected]>"]

[tool.poetry.dependencies]
python = "^3.8"
pandas = "^1.1.4"

[tool.poetry.dev-dependencies]

[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"

0 comments on commit 49500c3

Please sign in to comment.