-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #30 from isumit19/master
Merged
- Loading branch information
Showing
30 changed files
with
385 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Content Aggregator | ||
GDTC Hack_in Project | ||
|
||
### Description | ||
The project implements WebScraping from three websites ( AndroidPolice.com, Toptal.com and Careers360.com ) | ||
<ul> | ||
<li><a href="https://www.androidpolice.com/">AndroidPolice</a> - Top Tech News | ||
<li><a href="https://www.toptal.com/developers/blog">Toptal</a> - Developers Blog Headlines | ||
<li><a href="https://engineering.careers360.com/colleges/ranking">Careers360</a> - Top Engineering Colleges | ||
</ul> | ||
|
||
## Screenshots | ||
|
||
<img src="Screenshots/1.jpg"> | ||
<img src="Screenshots/7.jpg"> | ||
<img src="Screenshots/2.jpg"> | ||
<img src="Screenshots/8.jpg"> | ||
<img src="Screenshots/3.jpg"> |
26 changes: 26 additions & 0 deletions
26
P13_ContentAggregator/ContentAggregator/ScapeWeb/AndroidPolice.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import requests | ||
import sqlite3 as sq | ||
from bs4 import BeautifulSoup as bs | ||
|
||
|
||
def scrape(db): | ||
url = "https://www.androidpolice.com/" | ||
r = requests.get(url).content | ||
content = bs(r, "html.parser") | ||
con = db.connect() | ||
if con is not None: | ||
for i in reversed(content.find_all('header', class_="post-header")): | ||
try: | ||
x = i.find_all('a')[1] | ||
cursor = con.cursor() | ||
cursor.execute("Insert into content_agg(source, title, url) values('AndroidPolice',?,?)", (x.text.strip(), x['href'].strip())) | ||
|
||
except sq.IntegrityError as e: | ||
pass | ||
except Exception as e: | ||
print("Error : ", e) | ||
con.commit() | ||
con.close() | ||
return True | ||
else: | ||
return False |
28 changes: 28 additions & 0 deletions
28
P13_ContentAggregator/ContentAggregator/ScapeWeb/TopTal.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import requests | ||
import sqlite3 as sq | ||
from bs4 import BeautifulSoup as bs | ||
|
||
|
||
def scrape(db): | ||
for k in range(1, 4): | ||
url = "https://www.toptal.com/developers/blog?page=" + str(k) | ||
r = requests.get(url).content | ||
content = bs(r, "html.parser") | ||
con = db.connect() | ||
if con is not None: | ||
for i in reversed(content.find_all('div', class_="blog_post_card-content")): | ||
try: | ||
title = i.find('div', class_="blog_post_card__title").text.strip() | ||
url = i.find('a', class_="blog_post_card__title-link")['href'].strip() | ||
cursor = con.cursor() | ||
cursor.execute("Insert into content_agg(source,title, url) values('TopTal',?,?)", (title, url)) | ||
except sq.IntegrityError as e: | ||
pass | ||
except Exception as e: | ||
print("Error : ", e) | ||
con.commit() | ||
con.close() | ||
return True | ||
else: | ||
return False | ||
|
Binary file added
BIN
+935 Bytes
P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/AndroidPolice.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+1.05 KB
P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/TopTal.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+925 Bytes
P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/college.cpython-36.pyc
Binary file not shown.
29 changes: 29 additions & 0 deletions
29
P13_ContentAggregator/ContentAggregator/ScapeWeb/college.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import requests | ||
from bs4 import BeautifulSoup as bs | ||
import sqlite3 as sq | ||
|
||
|
||
def scapecollege(db): | ||
url = "https://engineering.careers360.com/colleges/ranking" | ||
r = requests.get(url).content | ||
content = bs(r, "html.parser") | ||
|
||
con = db.connectcollege() | ||
if con is not None: | ||
|
||
for i in reversed(content.find_all('td',class_="colgName")): | ||
try: | ||
|
||
title = i.text.strip() | ||
url = i.a['href'].strip() | ||
cursor = con.cursor() | ||
cursor.execute("Insert into college(title, url) values(?,?)",(title, url)) | ||
except sq.IntegrityError as e: | ||
pass | ||
except Exception as e: | ||
print("Error : ", e) | ||
con.commit() | ||
con.close() | ||
return True | ||
else: | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
from database import database as db | ||
import time | ||
from ScapeWeb import AndroidPolice, TopTal, college | ||
source = ['AndroidPolice', 'TopTal'] | ||
name = ['AndroidPolice', 'TopTal'] | ||
|
||
|
||
def scrapeAll(): | ||
if not AndroidPolice.scrape(db): | ||
print("ERROR: AndroidPolice SCRAPE ERROR") | ||
if not TopTal.scrape(db): | ||
print("ERROR : TopTal SCRAPE ERROR") | ||
if not college.scapecollege(db): | ||
print("ERROR : 360Careers SCRAPE ERROR") | ||
print("SCRAPING COMPLETE") | ||
|
||
|
||
def getContent(): | ||
content = {} | ||
conn = db.connect() | ||
c = conn.cursor() | ||
for j, i in enumerate(source): | ||
z = c.execute("Select * from content_agg where source='{}' order by rowid desc;".format(i)) | ||
content[name[j]] = z.fetchall() | ||
if not content[name[j]]: | ||
conn.close() | ||
return None | ||
conn.close() | ||
return content | ||
|
||
|
||
def getContentForSource(s): | ||
if s in source: | ||
i = source.index(s) | ||
content = {} | ||
conn = db.connect() | ||
c = conn.cursor() | ||
z = c.execute("Select * from content_agg where source='{}' order by rowid desc;".format(s)) | ||
content[name[i]] = z.fetchall() | ||
if not content[name[i]]: | ||
conn.close() | ||
return None | ||
conn.close() | ||
return content | ||
else: | ||
return None | ||
|
||
|
||
def scrapeStart(): | ||
while True: | ||
scrapeAll() | ||
time.sleep(10) | ||
|
||
|
||
def getContentCollege(): | ||
content={} | ||
conn = db.connectcollege() | ||
c = conn.cursor() | ||
head = "Top Colleges" | ||
z = c.execute("Select * from college order by rowid desc;") | ||
content[head] = z.fetchall() | ||
if not content[head]: | ||
conn.close() | ||
return None | ||
conn.close() | ||
return content |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+912 Bytes
P13_ContentAggregator/ContentAggregator/__pycache__/AndroidPolice.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+1.84 KB
P13_ContentAggregator/ContentAggregator/__pycache__/ScrapeJob.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+1.03 KB
P13_ContentAggregator/ContentAggregator/__pycache__/TopTal.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+916 Bytes
P13_ContentAggregator/ContentAggregator/__pycache__/college.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+1.54 KB
P13_ContentAggregator/ContentAggregator/__pycache__/database.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+917 Bytes
P13_ContentAggregator/ContentAggregator/__pycache__/databasecollege.cpython-36.pyc
Binary file not shown.
Binary file added
BIN
+1.07 KB
P13_ContentAggregator/ContentAggregator/__pycache__/scrape.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+1.55 KB
P13_ContentAggregator/ContentAggregator/database/__pycache__/database.cpython-36.pyc
Binary file not shown.
50 changes: 50 additions & 0 deletions
50
P13_ContentAggregator/ContentAggregator/database/database.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import sqlite3 | ||
from sqlite3 import Error | ||
|
||
|
||
def connect(): | ||
try: | ||
conn = sqlite3.connect('content.db') | ||
sql_create_projects_table = "CREATE TABLE IF NOT EXISTS content_agg (source text NOT NULL,title text NOT NULL,url text NOT NULL, CONSTRAINT PK PRIMARY KEY(source,title,url));" | ||
if conn is not None: | ||
create_table(conn, sql_create_projects_table) | ||
return conn | ||
else: | ||
print("Error! cannot create the database connection.") | ||
return None | ||
except Error as e: | ||
print(e) | ||
return None | ||
|
||
|
||
def create_table(conn, create_table_sql): | ||
try: | ||
c = conn.cursor() | ||
c.execute(create_table_sql) | ||
conn.commit() | ||
except Error as e: | ||
print("Error :", e) | ||
|
||
|
||
def connectcollege(): | ||
try: | ||
conn = sqlite3.connect('content.db') | ||
sql_create_projects_table = "CREATE TABLE IF NOT EXISTS college (title text NOT NULL,url text NOT NULL, CONSTRAINT PK PRIMARY KEY(title,url));" | ||
if conn is not None: | ||
create_table(conn, sql_create_projects_table) | ||
return conn | ||
else: | ||
print("Error! cannot create the database connection.") | ||
return None | ||
except Error as e: | ||
print(e) | ||
return None | ||
|
||
|
||
def create_tablecollege(conn, create_table_sql): | ||
try: | ||
c = conn.cursor() | ||
c.execute(create_table_sql) | ||
conn.commit() | ||
except Error as e: | ||
print("Error :", e) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
body{ | ||
font-family: monospace, serif; | ||
background-image: linear-gradient(147deg, #000000 0%, #2c3e50 74%); | ||
height: 100%; | ||
width: 100%; | ||
z-index: 1; | ||
background-repeat: no-repeat; | ||
color: white; | ||
} | ||
* { | ||
margin: 0px; | ||
padding: 0px; | ||
} | ||
h1{ | ||
padding: 1vw; | ||
text-align: center; | ||
text-transform: uppercase; | ||
font-size: 4vw; | ||
font-weight: bolder; | ||
width: 100%; | ||
} | ||
.content{ | ||
display: flex; | ||
flex-wrap: wrap; | ||
justify-content: space-around; | ||
padding-top: 7vw; | ||
} | ||
.content h2{ | ||
font-size: 2vw; | ||
font-weight: bolder; | ||
margin: 1vw 1vw 2vw 0vw; | ||
} | ||
.contenta{ | ||
display: block; | ||
text-decoration: none; | ||
white-space: nowrap; | ||
overflow: hidden; | ||
text-overflow: ellipsis; | ||
font-size: 1vw | ||
} | ||
.contenta:hover{ | ||
text-decoration: underline; | ||
} | ||
.data{ | ||
width: 35%; | ||
margin: 2vw; | ||
text-overflow: ellipsis; | ||
} | ||
.readmore{ | ||
text-decoration: none; | ||
color: #c45454; | ||
font-weight: bold; | ||
font-size: 1.4vw; | ||
} | ||
.readmore:hover{ | ||
text-decoration: underline; | ||
} | ||
a{ | ||
text-decoration: none; | ||
color: white; | ||
} | ||
a.button { | ||
font-size: 1vw; | ||
text-decoration: none; | ||
color: white; | ||
} |
20 changes: 20 additions & 0 deletions
20
P13_ContentAggregator/ContentAggregator/templates/college.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<!DOCTYPE html> | ||
<html> | ||
<head> | ||
<title>Content Aggregator</title> | ||
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}"> | ||
</head> | ||
<body> | ||
<h1>Content Aggregator</h1> | ||
<div class='content'> | ||
{% for key, value in content.items() %} | ||
<div class='data'> | ||
<h2>{{key}}</h2> | ||
{% for i in range(0,value|length) %} | ||
<a class='contenta' target="_blank" href={{value[i][1]}}>{{i+1}}. {{value[i][0]}}</a><br> | ||
{% endfor %} | ||
</div> | ||
{% endfor %} | ||
</div> | ||
</body> | ||
</html> |
28 changes: 28 additions & 0 deletions
28
P13_ContentAggregator/ContentAggregator/templates/index.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<!DOCTYPE html> | ||
<html> | ||
<head> | ||
<title>Content Aggregator</title> | ||
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}"> | ||
</head> | ||
<body> | ||
<h1>Content Aggregator</h1> | ||
<div class='content'> | ||
{% for key, value in content.items() %} | ||
<div class='data'> | ||
<h2>{{key}}</h2> | ||
{% for i in range(0,10) if i<value|length %} | ||
<a class='contenta' target="_blank" href={{value[i][2]}}>{{value[i][1]}}</a><br> | ||
{% endfor %} | ||
<a class='readmore' href="{{url_for('.readmore',source=content[key][0][0])}}">Read More..</a> | ||
</div> | ||
{% endfor %} | ||
</div> | ||
<center> | ||
<div class="college"> | ||
<h2>Top Colleges of India</h2> | ||
<a class="button" href="{{url_for('.college')}}">Visit Here!!</a> | ||
</center> | ||
|
||
|
||
</body> | ||
</html> |
20 changes: 20 additions & 0 deletions
20
P13_ContentAggregator/ContentAggregator/templates/readmore.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<!DOCTYPE html> | ||
<html> | ||
<head> | ||
<title>Content Aggregator</title> | ||
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}"> | ||
</head> | ||
<body> | ||
<h1>Content Aggregator</h1> | ||
<div class='content'> | ||
{% for key, value in content.items() %} | ||
<div class='data'> | ||
<h2>{{key}}</h2> | ||
{% for i in range(0,value|length) %} | ||
<a class='contenta' target="_blank" href={{value[i][2]}}>{{value[i][1]}}</a><br> | ||
{% endfor %} | ||
</div> | ||
{% endfor %} | ||
</div> | ||
</body> | ||
</html> |
Oops, something went wrong.