database migration on fly fixed

mhasan502 · Jan 16, 2024 · fd69be1 · fd69be1
1 parent 4d3b520
commit fd69be1
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 21 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -18,7 +18,9 @@ RUN set -ex && \
 
 COPY . /code/
 
-RUN python manage.py collectstatic --noinput
+RUN python manage.py makemigrations &&  \
+    python manage.py migrate &&  \
+    python manage.py collectstatic --noinput
 
 EXPOSE 8000
 

diff --git a/Somachar/settings.py b/Somachar/settings.py
@@ -46,6 +46,7 @@
     'django.contrib.auth.middleware.AuthenticationMiddleware',
     'django.contrib.messages.middleware.MessageMiddleware',
     'django.middleware.clickjacking.XFrameOptionsMiddleware',
+    'allauth.account.middleware.AccountMiddleware',
     'whitenoise.middleware.WhiteNoiseMiddleware',
 ]
 

diff --git a/news/scrape.py b/news/scrape.py
@@ -1,14 +1,17 @@
 import timeit
 import threading
+import traceback
+
 import requests
 from bs4 import BeautifulSoup
-from news.models import News
+#from news.models import News
 
 
 # checking if that news link exists on database
 def CheckIfExist(news_link):
-    num_of_news = News.objects.filter(newslink=news_link).count()
-    return num_of_news
+    # num_of_news = News.objects.filter(newslink=news_link).count()
+    # return num_of_news
+    return 0
 
 
 # Main news page to bring more news
@@ -31,9 +34,10 @@ def CollectLinks(soup, find_class, name):
 
 # save to database
 def SaveToDB(head, image_link, news_link, desc, name):
-    if desc != '' and len(head) < 90:
-        news = News(heading=head, imagelink=image_link, newslink=news_link, details=desc, papername=name)
-        news.save()
+    # if desc != '' and len(head) < 90:
+    #     news = News(heading=head, imagelink=image_link, newslink=news_link, details=desc, papername=name)
+    #     news.save()
+    pass
 
 
 # web scraping Jugantor
@@ -88,13 +92,13 @@ def Samakal():
                 head_div = soup.find('h1', {'class': 'detail-headline'})
                 head = head_div.getText()
 
-                image_div = soup.find('div', {'class': 'lightgallery'})
-                image = image_div.find('img', {'class': 'img-responsive'})
+                image_div = soup.find('div', {'class': 'image-container image rel-soci'})
+                image = image_div.find('img', {'class': None})
                 image_link = image.get('src')
 
                 desc = ''
                 body = soup.find('div', {'class': 'description'})
-                for i in body.find_all('span'):
+                for i in body.find_all('p'):
                     desc += i.getText().replace("\n", "")
 
                 SaveToDB(head, image_link, news_link, desc, name)
@@ -114,17 +118,21 @@ def Ittefaq():
 
     while len(links) > 0:
         news_link = links.pop()
+        if news_link[0] == '/' and news_link[1] == '/':
+            news_link = "https:" + news_link
         try:
             if CheckIfExist(news_link) == 0:
                 news_url = requests.get(news_link)
                 soup = BeautifulSoup(news_url.text, 'html.parser')
 
-                head_div = soup.find('div', {'id': 'dtl_hl_block'})
+                head_div = soup.find('h1', {'class': 'title mb10'})
                 head = head_div.getText()
 
-                image_div = soup.find('div', {'id': 'dtl_img_block'})
-                image = image_div.find('img')
+                image_div = soup.find('div', {'class': 'featured_image'})
+                print(image_div)
+                image = image_div.find('a', {'class': 'jw_media_holder media_image alignfull pop-media-holder pop-active'})
                 image_link = "https://www.ittefaq.com.bd" + image.get('src')
+                print(image_link)
 
                 desc = ''
                 body = soup.find('div', {'id': 'dtl_content_block'})
@@ -135,6 +143,7 @@ def Ittefaq():
             else:
                 break
         except Exception:
+            traceback.print_exc()
             continue
 
 
@@ -143,13 +152,17 @@ def Scrape():
     start = timeit.default_timer()
 
     print("______________Initialized Scrape_________________")
-    p1 = threading.Thread(target=Jugantor())
-    p2 = threading.Thread(target=Samakal())
+    # p1 = threading.Thread(target=Jugantor())
+    # p2 = threading.Thread(target=Samakal())
     p3 = threading.Thread(target=Ittefaq())
 
-    p1.start()
-    p2.start()
+    # p1.start()
+    # p2.start()
     p3.start()
 
     stop = timeit.default_timer()
     print('Time: ', stop - start)
+
+
+if __name__ == '__main__':
+    Scrape()
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 beautifulsoup4==4.12.2
-Django==4.2.1
-django-allauth==0.54.0
+Django==5.0.1
+django-allauth==0.60.1
 djangorestframework==3.14.0
-gunicorn==20.1.0
-whitenoise==6.4.0
+gunicorn==21.2.0
+whitenoise==6.6.0