From 493558cb2aeac1879fd3e9289a5fec8bb00d9218 Mon Sep 17 00:00:00 2001
From: Midnight <etienne.berube@ineat.ca>
Date: Wed, 3 Jul 2019 14:31:45 -0400
Subject: [PATCH 1/3] Fixed one test and used an env var for the google cloud
 bucket

The bucket name will be given as an environment variable as the method to_text() is only given one argument when used in extract_data
---
 src/invoice2data/input/gvision.py | 10 +++++++---
 tests/test_cli.py                 |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/invoice2data/input/gvision.py b/src/invoice2data/input/gvision.py
index cfb48b49..93d6b5e0 100644
--- a/src/invoice2data/input/gvision.py
+++ b/src/invoice2data/input/gvision.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
+def to_text(path, language='fr'):
     """Sends PDF files to Google Cloud Vision for OCR.
 
     Before using invoice2data, make sure you have the auth json path set as
@@ -9,8 +9,6 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
     ----------
     path : str
         path of electronic invoice in JPG or PNG format
-    bucket_name : str
-        name of bucket to use for file storage and results cache.
 
     Returns
     -------
@@ -27,6 +25,12 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
 
     # Supported mime_types are: 'application/pdf' and 'image/tiff'
     mime_type = 'application/pdf'
+    bucket_name = os.getenv('GOOGLE_CLOUD_BUCKET_NAME', None)
+
+    if bucket_name is None:
+        raise EnvironmentError(
+            'GOOGLE_CLOUD_BUCKET_NAME environment variable not set'
+        )
 
     path_dir, filename = os.path.split(path)
     result_blob_basename = filename.replace('.pdf', '').replace('.PDF', '')
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 40f908f7..0741185f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -133,7 +133,7 @@ def test_copy(self):
                     i += 1
 
         shutil.rmtree('tests/copy_test/', ignore_errors=True)
-        self.assertEqual(i, len(get_sample_files('.json')))
+        self.assertEqual(i, len(get_sample_files('.pdf')))
         '''
         if i != len(self._get_test_file_json_path()):
             print(i)

From 97ac5891a8e703efa22659ff6a61304cafb95414 Mon Sep 17 00:00:00 2001
From: Midnight <etienne.berube@ineat.ca>
Date: Wed, 3 Jul 2019 14:46:37 -0400
Subject: [PATCH 2/3] Updated README regarding changes with google cloud bucket
 name

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 078aa1e1..a159ebdb 100644
--- a/README.rst
+++ b/README.rst
@@ -67,7 +67,7 @@ Choose any of the following input readers:
  - tesseract ``invoice2data --input-reader tesseract invoice.pdf``
  - pdf miner ``invoice2data --input-reader pdfminer invoice.pdf``
  - tesseract4 ``invoice2data --input-reader tesseract4 invoice.pdf``
- - gvision ``invoice2data --input-reader gvision invoice.pdf`` (needs ``GOOGLE_APPLICATION_CREDENTIALS`` env var)
+ - gvision ``invoice2data --input-reader gvision invoice.pdf`` (needs ``GOOGLE_APPLICATION_CREDENTIALS`` and ``GOOGLE_CLOUD_BUCKET_NAME`` env var)
 
 Choose any of the following output formats:
 

From 27c27c8e8025847e105f67d48c5efc622aa35009 Mon Sep 17 00:00:00 2001
From: Midnight <etienne.berube@ineat.ca>
Date: Mon, 15 Jul 2019 10:55:51 -0400
Subject: [PATCH 3/3] Added precision to README and Re-added the bucket name as
 argument

---
 README.rst                        |  2 +-
 src/invoice2data/input/gvision.py | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/README.rst b/README.rst
index a159ebdb..3cbd8585 100644
--- a/README.rst
+++ b/README.rst
@@ -67,7 +67,7 @@ Choose any of the following input readers:
  - tesseract ``invoice2data --input-reader tesseract invoice.pdf``
  - pdf miner ``invoice2data --input-reader pdfminer invoice.pdf``
  - tesseract4 ``invoice2data --input-reader tesseract4 invoice.pdf``
- - gvision ``invoice2data --input-reader gvision invoice.pdf`` (needs ``GOOGLE_APPLICATION_CREDENTIALS`` and ``GOOGLE_CLOUD_BUCKET_NAME`` env var)
+ - gvision ``invoice2data --input-reader gvision invoice.pdf`` (needs ``GOOGLE_APPLICATION_CREDENTIALS`` and a Google Cloud Bucket name. The bucket name can be set as an argument to the function ``to_text`` or as an Environment variable named ``GOOGLE_CLOUD_BUCKET_NAME`` )
 
 Choose any of the following output formats:
 
diff --git a/src/invoice2data/input/gvision.py b/src/invoice2data/input/gvision.py
index 93d6b5e0..cb75d5c7 100644
--- a/src/invoice2data/input/gvision.py
+++ b/src/invoice2data/input/gvision.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-def to_text(path, language='fr'):
+def to_text(path, bucket_name=None, language='fr'):
     """Sends PDF files to Google Cloud Vision for OCR.
 
     Before using invoice2data, make sure you have the auth json path set as
@@ -9,6 +9,8 @@ def to_text(path, language='fr'):
     ----------
     path : str
         path of electronic invoice in JPG or PNG format
+    bucket_name : str
+        name of bucket to use for file storage and results cache.
 
     Returns
     -------
@@ -25,12 +27,14 @@ def to_text(path, language='fr'):
 
     # Supported mime_types are: 'application/pdf' and 'image/tiff'
     mime_type = 'application/pdf'
-    bucket_name = os.getenv('GOOGLE_CLOUD_BUCKET_NAME', None)
 
     if bucket_name is None:
-        raise EnvironmentError(
-            'GOOGLE_CLOUD_BUCKET_NAME environment variable not set'
-        )
+        bucket_name = os.getenv('GOOGLE_CLOUD_BUCKET_NAME', None)
+
+        if bucket_name is None:
+            raise EnvironmentError(
+                'No Google Cloud Bucket name set.\n Set it as an input variable or as an environment variable named GOOGLE_CLOUD_BUCKET_NAME'
+            )
 
     path_dir, filename = os.path.split(path)
     result_blob_basename = filename.replace('.pdf', '').replace('.PDF', '')