Skip to content

Commit

Permalink
Initial Commit
Browse files Browse the repository at this point in the history
  • Loading branch information
isumit19 authored Jul 6, 2019
1 parent a0f6335 commit 6c47fae
Show file tree
Hide file tree
Showing 24 changed files with 369 additions and 0 deletions.
2 changes: 2 additions & 0 deletions P13_ContentAggregator/ContentAggregator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# ContentAggregator
GDTC Hack_in
26 changes: 26 additions & 0 deletions P13_ContentAggregator/ContentAggregator/ScapeWeb/AndroidPolice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import requests
import sqlite3 as sq
from bs4 import BeautifulSoup as bs


def scrape(db):
url = "https://www.androidpolice.com/"
r = requests.get(url).content
content = bs(r, "html.parser")
con = db.connect()
if con is not None:
for i in reversed(content.find_all('header', class_="post-header")):
try:
x = i.find_all('a')[1]
cursor = con.cursor()
cursor.execute("Insert into content_agg(source, title, url) values('AndroidPolice',?,?)", (x.text.strip(), x['href'].strip()))

except sq.IntegrityError as e:
pass
except Exception as e:
print("Error : ", e)
con.commit()
con.close()
return True
else:
return False
28 changes: 28 additions & 0 deletions P13_ContentAggregator/ContentAggregator/ScapeWeb/TopTal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import requests
import sqlite3 as sq
from bs4 import BeautifulSoup as bs


def scrape(db):
for k in range(1, 4):
url = "https://www.toptal.com/developers/blog?page=" + str(k)
r = requests.get(url).content
content = bs(r, "html.parser")
con = db.connect()
if con is not None:
for i in reversed(content.find_all('div', class_="blog_post_card-content")):
try:
title = i.find('div', class_="blog_post_card__title").text.strip()
url = i.find('a', class_="blog_post_card__title-link")['href'].strip()
cursor = con.cursor()
cursor.execute("Insert into content_agg(source,title, url) values('TopTal',?,?)", (title, url))
except sq.IntegrityError as e:
pass
except Exception as e:
print("Error : ", e)
con.commit()
con.close()
return True
else:
return False

Binary file not shown.
Binary file not shown.
Binary file not shown.
29 changes: 29 additions & 0 deletions P13_ContentAggregator/ContentAggregator/ScapeWeb/college.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import requests
from bs4 import BeautifulSoup as bs
import sqlite3 as sq


def scapecollege(db):
url = "https://engineering.careers360.com/colleges/ranking"
r = requests.get(url).content
content = bs(r, "html.parser")

con = db.connectcollege()
if con is not None:

for i in reversed(content.find_all('td',class_="colgName")):
try:

title = i.text.strip()
url = i.a['href'].strip()
cursor = con.cursor()
cursor.execute("Insert into college(title, url) values(?,?)",(title, url))
except sq.IntegrityError as e:
pass
except Exception as e:
print("Error : ", e)
con.commit()
con.close()
return True
else:
return False
66 changes: 66 additions & 0 deletions P13_ContentAggregator/ContentAggregator/ScrapeJob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from database import database as db
import time
from ScapeWeb import AndroidPolice, TopTal, college
source = ['AndroidPolice', 'TopTal']
name = ['AndroidPolice', 'TopTal']


def scrapeAll():
if not AndroidPolice.scrape(db):
print("ERROR: AndroidPolice SCRAPE ERROR")
if not TopTal.scrape(db):
print("ERROR : TopTal SCRAPE ERROR")
if not college.scapecollege(db):
print("ERROR : 360Careers SCRAPE ERROR")
print("SCRAPING COMPLETE")


def getContent():
content = {}
conn = db.connect()
c = conn.cursor()
for j, i in enumerate(source):
z = c.execute("Select * from content_agg where source='{}' order by rowid desc;".format(i))
content[name[j]] = z.fetchall()
if not content[name[j]]:
conn.close()
return None
conn.close()
return content


def getContentForSource(s):
if s in source:
i = source.index(s)
content = {}
conn = db.connect()
c = conn.cursor()
z = c.execute("Select * from content_agg where source='{}' order by rowid desc;".format(s))
content[name[i]] = z.fetchall()
if not content[name[i]]:
conn.close()
return None
conn.close()
return content
else:
return None


def scrapeStart():
while True:
scrapeAll()
time.sleep(10)


def getContentCollege():
content={}
conn = db.connectcollege()
c = conn.cursor()
head = "Top Colleges"
z = c.execute("Select * from college order by rowid desc;")
content[head] = z.fetchall()
if not content[head]:
conn.close()
return None
conn.close()
return content
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
50 changes: 50 additions & 0 deletions P13_ContentAggregator/ContentAggregator/database/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import sqlite3
from sqlite3 import Error


def connect():
try:
conn = sqlite3.connect('content.db')
sql_create_projects_table = "CREATE TABLE IF NOT EXISTS content_agg (source text NOT NULL,title text NOT NULL,url text NOT NULL, CONSTRAINT PK PRIMARY KEY(source,title,url));"
if conn is not None:
create_table(conn, sql_create_projects_table)
return conn
else:
print("Error! cannot create the database connection.")
return None
except Error as e:
print(e)
return None


def create_table(conn, create_table_sql):
try:
c = conn.cursor()
c.execute(create_table_sql)
conn.commit()
except Error as e:
print("Error :", e)


def connectcollege():
try:
conn = sqlite3.connect('content.db')
sql_create_projects_table = "CREATE TABLE IF NOT EXISTS college (title text NOT NULL,url text NOT NULL, CONSTRAINT PK PRIMARY KEY(title,url));"
if conn is not None:
create_table(conn, sql_create_projects_table)
return conn
else:
print("Error! cannot create the database connection.")
return None
except Error as e:
print(e)
return None


def create_tablecollege(conn, create_table_sql):
try:
c = conn.cursor()
c.execute(create_table_sql)
conn.commit()
except Error as e:
print("Error :", e)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
66 changes: 66 additions & 0 deletions P13_ContentAggregator/ContentAggregator/static/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
body{
font-family: monospace, serif;
background-image: linear-gradient(147deg, #000000 0%, #2c3e50 74%);
height: 100%;
width: 100%;
z-index: 1;
background-repeat: no-repeat;
color: white;
}
* {
margin: 0px;
padding: 0px;
}
h1{
padding: 1vw;
text-align: center;
text-transform: uppercase;
font-size: 4vw;
font-weight: bolder;
width: 100%;
}
.content{
display: flex;
flex-wrap: wrap;
justify-content: space-around;
padding-top: 7vw;
}
.content h2{
font-size: 2vw;
font-weight: bolder;
margin: 1vw 1vw 2vw 0vw;
}
.contenta{
display: block;
text-decoration: none;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
font-size: 1vw
}
.contenta:hover{
text-decoration: underline;
}
.data{
width: 35%;
margin: 2vw;
text-overflow: ellipsis;
}
.readmore{
text-decoration: none;
color: #c45454;
font-weight: bold;
font-size: 1.4vw;
}
.readmore:hover{
text-decoration: underline;
}
a{
text-decoration: none;
color: white;
}
a.button {
font-size: 1vw;
text-decoration: none;
color: white;
}
20 changes: 20 additions & 0 deletions P13_ContentAggregator/ContentAggregator/templates/college.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
<title>Content Aggregator</title>
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h1>Content Aggregator</h1>
<div class='content'>
{% for key, value in content.items() %}
<div class='data'>
<h2>{{key}}</h2>
{% for i in range(0,value|length) %}
<a class='contenta' target="_blank" href={{value[i][1]}}>{{i+1}}. {{value[i][0]}}</a><br>
{% endfor %}
</div>
{% endfor %}
</div>
</body>
</html>
28 changes: 28 additions & 0 deletions P13_ContentAggregator/ContentAggregator/templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<title>Content Aggregator</title>
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h1>Content Aggregator</h1>
<div class='content'>
{% for key, value in content.items() %}
<div class='data'>
<h2>{{key}}</h2>
{% for i in range(0,10) if i<value|length %}
<a class='contenta' target="_blank" href={{value[i][2]}}>{{value[i][1]}}</a><br>
{% endfor %}
<a class='readmore' href="{{url_for('.readmore',source=content[key][0][0])}}">Read More..</a>
</div>
{% endfor %}
</div>
<center>
<div class="college">
<h2>Top Colleges of India</h2>
<a class="button" href="{{url_for('.college')}}">Visit Here!!</a>
</center>


</body>
</html>
20 changes: 20 additions & 0 deletions P13_ContentAggregator/ContentAggregator/templates/readmore.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
<title>Content Aggregator</title>
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h1>Content Aggregator</h1>
<div class='content'>
{% for key, value in content.items() %}
<div class='data'>
<h2>{{key}}</h2>
{% for i in range(0,value|length) %}
<a class='contenta' target="_blank" href={{value[i][2]}}>{{value[i][1]}}</a><br>
{% endfor %}
</div>
{% endfor %}
</div>
</body>
</html>
34 changes: 34 additions & 0 deletions P13_ContentAggregator/ContentAggregator/webApp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from flask import Flask, render_template, redirect
import ScrapeJob as sj
import threading

app = Flask(__name__)


@app.route('/', methods=["GET"])
def home():
content = sj.getContent()
if content is None:
return "FETCHING DATA PLEASE TRY AGAIN LATER !"
return render_template('index.html', content=content)


@app.route('/readmore/<source>', methods=["GET"])
def readmore(source):
content = sj.getContentForSource(source)
if content is None:
return redirect('/404')
return render_template('readmore.html', content=content)

@app.route('/college/', methods=["GET"])
def college():
content = sj.getContentCollege()
return render_template('college.html',content=content)



if __name__ == '__main__':
t1 = threading.Thread(target=sj.scrapeStart)
t1.daemon = True
t1.start()
app.run(debug=True)

0 comments on commit 6c47fae

Please sign in to comment.