Initial Commit

girlscriptjaipur · Jul 6, 2019 · 6c47fae · 6c47fae
1 parent a0f6335
commit 6c47fae
Show file tree

Hide file tree

Showing 24 changed files with 369 additions and 0 deletions.
diff --git a/P13_ContentAggregator/ContentAggregator/README.md b/P13_ContentAggregator/ContentAggregator/README.md
@@ -0,0 +1,2 @@
+# ContentAggregator
+ GDTC Hack_in
diff --git a/P13_ContentAggregator/ContentAggregator/ScapeWeb/AndroidPolice.py b/P13_ContentAggregator/ContentAggregator/ScapeWeb/AndroidPolice.py
@@ -0,0 +1,26 @@
+import requests
+import sqlite3 as sq
+from bs4 import BeautifulSoup as bs
+
+
+def scrape(db):
+    url = "https://www.androidpolice.com/"
+    r = requests.get(url).content
+    content = bs(r, "html.parser")
+    con = db.connect()
+    if con is not None:
+        for i in reversed(content.find_all('header', class_="post-header")):
+            try:
+                x = i.find_all('a')[1]
+                cursor = con.cursor()
+                cursor.execute("Insert into content_agg(source, title, url) values('AndroidPolice',?,?)", (x.text.strip(), x['href'].strip()))
+
+            except sq.IntegrityError as e:
+                pass
+            except Exception as e:
+                print("Error : ", e)
+        con.commit()
+        con.close()
+        return True
+    else:
+        return False
diff --git a/P13_ContentAggregator/ContentAggregator/ScapeWeb/TopTal.py b/P13_ContentAggregator/ContentAggregator/ScapeWeb/TopTal.py
@@ -0,0 +1,28 @@
+import requests
+import sqlite3 as sq
+from bs4 import BeautifulSoup as bs
+
+
+def scrape(db):
+    for k in range(1, 4):
+        url = "https://www.toptal.com/developers/blog?page=" + str(k)
+        r = requests.get(url).content
+        content = bs(r, "html.parser")
+        con = db.connect()
+        if con is not None:
+            for i in reversed(content.find_all('div', class_="blog_post_card-content")):
+                try:
+                    title = i.find('div', class_="blog_post_card__title").text.strip()
+                    url = i.find('a', class_="blog_post_card__title-link")['href'].strip()
+                    cursor = con.cursor()
+                    cursor.execute("Insert into content_agg(source,title, url) values('TopTal',?,?)", (title, url))
+                except sq.IntegrityError as e:
+                    pass
+                except Exception as e:
+                    print("Error : ", e)
+            con.commit()
+            con.close()
+            return True
+        else:
+            return False
+
diff --git a/P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/AndroidPolice.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/AndroidPolice.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/TopTal.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/TopTal.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/college.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/ScapeWeb/__pycache__/college.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/ScapeWeb/college.py b/P13_ContentAggregator/ContentAggregator/ScapeWeb/college.py
@@ -0,0 +1,29 @@
+import requests
+from bs4 import BeautifulSoup as bs
+import sqlite3 as sq
+
+
+def scapecollege(db):
+    url = "https://engineering.careers360.com/colleges/ranking"
+    r = requests.get(url).content
+    content = bs(r, "html.parser")
+
+    con = db.connectcollege()
+    if con is not None:
+
+        for i in reversed(content.find_all('td',class_="colgName")):
+            try:
+
+                title = i.text.strip()
+                url = i.a['href'].strip()
+                cursor = con.cursor()
+                cursor.execute("Insert into college(title, url) values(?,?)",(title, url))
+            except sq.IntegrityError as e:
+                pass
+            except Exception as e:
+                print("Error : ", e)
+        con.commit()
+        con.close()
+        return True
+    else:
+        return False
diff --git a/P13_ContentAggregator/ContentAggregator/ScrapeJob.py b/P13_ContentAggregator/ContentAggregator/ScrapeJob.py
@@ -0,0 +1,66 @@
+from database import database as db
+import time
+from ScapeWeb import AndroidPolice, TopTal, college
+source = ['AndroidPolice', 'TopTal']
+name = ['AndroidPolice', 'TopTal']
+
+
+def scrapeAll():
+    if not AndroidPolice.scrape(db):
+        print("ERROR: AndroidPolice SCRAPE ERROR")
+    if not TopTal.scrape(db):
+        print("ERROR : TopTal SCRAPE ERROR")
+    if not college.scapecollege(db):
+        print("ERROR : 360Careers SCRAPE ERROR")
+    print("SCRAPING COMPLETE")
+
+
+def getContent():
+    content = {}
+    conn = db.connect()
+    c = conn.cursor()
+    for j, i in enumerate(source):
+        z = c.execute("Select * from content_agg where source='{}' order by rowid desc;".format(i))
+        content[name[j]] = z.fetchall()
+        if not content[name[j]]:
+            conn.close()
+            return None
+    conn.close()
+    return content
+
+
+def getContentForSource(s):
+    if s in source:
+        i = source.index(s)
+        content = {}
+        conn = db.connect()
+        c = conn.cursor()
+        z = c.execute("Select * from content_agg where source='{}' order by rowid desc;".format(s))
+        content[name[i]] = z.fetchall()
+        if not content[name[i]]:
+            conn.close()
+            return None
+        conn.close()
+        return content
+    else:
+        return None
+
+
+def scrapeStart():
+    while True:
+        scrapeAll()
+        time.sleep(10)
+
+
+def getContentCollege():
+    content={}
+    conn = db.connectcollege()
+    c = conn.cursor()
+    head = "Top Colleges"
+    z = c.execute("Select * from college order by rowid desc;")
+    content[head] = z.fetchall()
+    if not content[head]:
+        conn.close()
+        return None
+    conn.close()
+    return content
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/AndroidPolice.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/AndroidPolice.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/ScrapeJob.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/ScrapeJob.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/TopTal.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/TopTal.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/college.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/college.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/database.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/database.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/databasecollege.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/databasecollege.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/__pycache__/scrape.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/__pycache__/scrape.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/content.db b/P13_ContentAggregator/ContentAggregator/content.db
diff --git a/P13_ContentAggregator/ContentAggregator/database/__pycache__/database.cpython-36.pyc b/P13_ContentAggregator/ContentAggregator/database/__pycache__/database.cpython-36.pyc
diff --git a/P13_ContentAggregator/ContentAggregator/database/database.py b/P13_ContentAggregator/ContentAggregator/database/database.py
@@ -0,0 +1,50 @@
+import sqlite3
+from sqlite3 import Error
+
+
+def connect():
+    try:
+        conn = sqlite3.connect('content.db')
+        sql_create_projects_table = "CREATE TABLE IF NOT EXISTS content_agg (source text NOT NULL,title text NOT NULL,url text NOT NULL, CONSTRAINT PK PRIMARY KEY(source,title,url));"
+        if conn is not None:
+            create_table(conn, sql_create_projects_table)
+            return conn
+        else:
+            print("Error! cannot create the database connection.")
+            return None
+    except Error as e:
+        print(e)
+        return None
+
+
+def create_table(conn, create_table_sql):
+    try:
+        c = conn.cursor()
+        c.execute(create_table_sql)
+        conn.commit()
+    except Error as e:
+        print("Error :", e)
+
+
+def connectcollege():
+    try:
+        conn = sqlite3.connect('content.db')
+        sql_create_projects_table = "CREATE TABLE IF NOT EXISTS college (title text NOT NULL,url text NOT NULL, CONSTRAINT PK PRIMARY KEY(title,url));"
+        if conn is not None:
+            create_table(conn, sql_create_projects_table)
+            return conn
+        else:
+            print("Error! cannot create the database connection.")
+            return None
+    except Error as e:
+        print(e)
+        return None
+
+
+def create_tablecollege(conn, create_table_sql):
+    try:
+        c = conn.cursor()
+        c.execute(create_table_sql)
+        conn.commit()
+    except Error as e:
+        print("Error :", e)
diff --git a/P13_ContentAggregator/ContentAggregator/static/bg.jpg b/P13_ContentAggregator/ContentAggregator/static/bg.jpg
diff --git a/P13_ContentAggregator/ContentAggregator/static/style.css b/P13_ContentAggregator/ContentAggregator/static/style.css
@@ -0,0 +1,66 @@
+body{
+	font-family: monospace, serif;
+	background-image: linear-gradient(147deg, #000000 0%, #2c3e50 74%);
+	height: 100%;
+	width: 100%;
+	z-index: 1;
+	background-repeat: no-repeat;
+	color: white;
+}
+* {
+   margin: 0px;
+   padding: 0px;
+}
+h1{
+	padding: 1vw;
+	text-align: center;
+	text-transform: uppercase;
+	font-size: 4vw;
+	font-weight: bolder;
+	width: 100%;
+}
+.content{
+	display: flex;
+	flex-wrap: wrap;
+	justify-content: space-around;
+	padding-top: 7vw;
+}
+.content h2{
+	font-size: 2vw;
+	font-weight: bolder;
+	margin: 1vw 1vw 2vw 0vw;
+}
+.contenta{
+	display: block;
+	text-decoration: none;
+	white-space: nowrap;
+	overflow: hidden;
+	text-overflow: ellipsis;
+	font-size: 1vw
+}
+.contenta:hover{
+	text-decoration: underline;
+}
+.data{
+	width: 35%;
+	margin: 2vw;
+	text-overflow: ellipsis;
+}
+.readmore{
+	text-decoration: none;
+	color: #c45454;
+	font-weight: bold;
+	font-size: 1.4vw;
+}
+.readmore:hover{
+	text-decoration: underline;
+}
+a{
+	text-decoration: none;
+	color: white;
+}
+a.button {
+    font-size: 1vw;
+    text-decoration: none;
+    color: white;
+}
diff --git a/P13_ContentAggregator/ContentAggregator/templates/college.html b/P13_ContentAggregator/ContentAggregator/templates/college.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Content Aggregator</title>
+  <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+  <h1>Content Aggregator</h1>
+  <div class='content'>
+    {% for key, value in content.items() %}
+    <div class='data'>
+      <h2>{{key}}</h2>
+        {% for i in range(0,value|length)  %}
+          <a class='contenta' target="_blank" href={{value[i][1]}}>{{i+1}}. {{value[i][0]}}</a><br>
+        {% endfor %}
+    </div>
+    {% endfor %}
+  </div>
+</body>
+</html>
diff --git a/P13_ContentAggregator/ContentAggregator/templates/index.html b/P13_ContentAggregator/ContentAggregator/templates/index.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Content Aggregator</title>
+  <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+  <h1>Content Aggregator</h1>
+  <div class='content'>
+    {% for key, value in content.items() %}
+    <div class='data'>
+      <h2>{{key}}</h2>
+        {% for i in range(0,10) if  i<value|length %}
+          <a class='contenta' target="_blank" href={{value[i][2]}}>{{value[i][1]}}</a><br>
+        {% endfor %}
+      <a class='readmore' href="{{url_for('.readmore',source=content[key][0][0])}}">Read More..</a>
+    </div>
+    {% endfor %}
+  </div>
+  <center>
+<div class="college">
+  <h2>Top Colleges of India</h2>
+    <a class="button" href="{{url_for('.college')}}">Visit Here!!</a>
+</center>
+
+
+</body>
+</html>
diff --git a/P13_ContentAggregator/ContentAggregator/templates/readmore.html b/P13_ContentAggregator/ContentAggregator/templates/readmore.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Content Aggregator</title>
+  <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+  <h1>Content Aggregator</h1>
+  <div class='content'>
+    {% for key, value in content.items() %}
+    <div class='data'>
+      <h2>{{key}}</h2>
+        {% for i in range(0,value|length)  %}
+          <a class='contenta' target="_blank" href={{value[i][2]}}>{{value[i][1]}}</a><br>
+        {% endfor %}
+    </div>
+    {% endfor %}
+  </div>
+</body>
+</html>
diff --git a/P13_ContentAggregator/ContentAggregator/webApp.py b/P13_ContentAggregator/ContentAggregator/webApp.py
@@ -0,0 +1,34 @@
+from flask import Flask, render_template, redirect
+import ScrapeJob as sj
+import threading
+
+app = Flask(__name__)
+
+
+@app.route('/', methods=["GET"])
+def home():
+    content = sj.getContent()
+    if content is None:
+        return "FETCHING DATA PLEASE TRY AGAIN LATER !"
+    return render_template('index.html', content=content)
+
+
+@app.route('/readmore/<source>', methods=["GET"])
+def readmore(source):
+    content = sj.getContentForSource(source)
+    if content is None:
+        return redirect('/404')
+    return render_template('readmore.html', content=content)
+
+@app.route('/college/', methods=["GET"])
+def college():
+    content = sj.getContentCollege()
+    return render_template('college.html',content=content)
+
+
+
+if __name__ == '__main__':
+    t1 = threading.Thread(target=sj.scrapeStart)
+    t1.daemon = True
+    t1.start()
+    app.run(debug=True)