meetmangukiya · vaibhavsingh97 · Jun 26, 2018 · meetmangukiya · Jun 26, 2018 · meetmangukiya
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,107 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+.vscode
+.DS_Store
diff --git a/instagram_scraper/__init__.py b/instagram_scraper/__init__.py
@@ -0,0 +1,5 @@
+from .instagram_scraper import main
+
+__name__ = "Instagram Scraper"
+__author__ = "Meet Mangukiya <[email protected]>"
+__version__ = '1.0.0'
diff --git a/instagram_scraper.py → instagram_scraper/instagram_scraper.py b/instagram_scraper.py → instagram_scraper/instagram_scraper.py
@@ -10,7 +10,6 @@
 from requests_html import HTMLSession
 
 
-
 # Source: http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
 REGEXES = {
     'hashtag': re.compile('(?:#)([A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\.(?!\.))){0,28}(?:[A-Za-z0-9_]))?)'),
@@ -61,13 +60,14 @@ def scrape_instagram(tags: List[str], total_count: int=50, existing: set=None):
     for tag in tags:
         yield from scrape_instagram_tag(tag, total_count)
 
+
 def main(tags, total_count, should_continue):
     def _single_tag_processing(tag, total_count, existing_links, start):
         os.makedirs(f'data/{tag}', exist_ok=True)
         with open(f'data/{tag}/data.csv', 'a' if existing_links else 'w', newline='', encoding='utf-8') as csvfile:
             writer = csv.writer(csvfile, delimiter=',')
             for count, (url, caption, hashtags, mentions) in enumerate(scrape_instagram_tag(
-                tag, total_count, existing_links), start):
+                    tag, total_count, existing_links), start):
 
                 try:
                     req = requests.get(url)
@@ -83,7 +83,8 @@ def _single_tag_processing(tag, total_count, existing_links, start):
                         ', '.join(hashtags),
                         ', '.join(mentions)
                     ])
-                    print(f'[{tag}] downloaded {url} as {count}.jpg in data/{tag}')
+                    print(
+                        f'[{tag}] downloaded {url} as {count}.jpg in data/{tag}')
 
     for tag in tags:
         existing_links = set()
@@ -96,7 +97,8 @@ def _single_tag_processing(tag, total_count, existing_links, start):
                 start = i + 1
         _single_tag_processing(tag, total_count, existing_links, start)
 
-if __name__ == '__main__':
+
+def run():
     parser = argparse.ArgumentParser()
     parser.add_argument('--tags', '-t', nargs='+',
                         help='Tags to scrape images from')
@@ -111,3 +113,7 @@ def _single_tag_processing(tag, total_count, existing_links, start):
     assert args.tags, "Enter tags to scrape! Use --tags option, see help."
     assert args.count, "Enter total number of images to scrape using --count option, see help."
     main(args.tags, args.count, args.cont)
+
+
+if __name__ == '__main__':
+    run()
diff --git a/setup.py b/setup.py
@@ -0,0 +1,28 @@
+import setuptools
+import instagram_scraper
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name=instagram_scraper.__name__,
+    version=instagram_scraper.__version__,
+    author=instagram_scraper.__author__,
+    author_email="[email protected]",
+    description="Scrape the Instagram frontend",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/meetmangukiya/instagram-scraper",
+    packages=setuptools.find_packages(),
+    install_requires=['requests-html'],
+    classifiers=(
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ),
+    entry_points={
+        'console_scripts': [
+            'scrape-insta = instagram_scraper.instagram_scraper:run',
+        ],
+    }
+)