google · diogoaihara · Oct 18, 2022 · Oct 19, 2022 · Oct 19, 2022 · Oct 19, 2022
diff --git a/megalista_dataflow/config/logging.py b/megalista_dataflow/config/logging.py
@@ -14,15 +14,18 @@
 
 import logging
 import sys
-from error.logging_handler import LoggingHandler
+import io
+import traceback
+from types import FrameType
+from typing import Optional, Tuple, List, Any
+
+from models.execution import Execution
 
 class LoggingConfig:
     @staticmethod
     def config_logging(show_lines: bool = False):
         # If there is a FileHandler, the execution is running on Dataflow
         # In this scenario, we shouldn't change the formatter
-        logging_handler = LoggingHandler()
-        logging.getLogger().addHandler(logging_handler)
         file_handler = LoggingConfig.get_file_handler()
         if file_handler is None:
             log_format = "[%(levelname)s] %(name)s: %(message)s"
@@ -36,9 +39,7 @@ def config_logging(show_lines: bool = False):
                 stream_handler = logging.StreamHandler(stream=sys.stderr)
                 logging.getLogger().addHandler(stream_handler)
             stream_handler.setFormatter(formatter)
-
-            logging_handler.setFormatter(formatter)
-
+
         logging.getLogger().setLevel(logging.ERROR)
         logging.getLogger("megalista").setLevel(logging.INFO)
 
@@ -50,10 +51,9 @@ def get_stream_handler():
     def get_file_handler():
         return LoggingConfig.get_handler(logging.FileHandler)
 
-
     @staticmethod
     def get_logging_handler():
-        return LoggingConfig.get_handler(LoggingHandler)
+        return None
 
     @staticmethod
     def get_handler(type: type):
@@ -63,4 +63,123 @@ def get_handler(type: type):
                 result_handler = handler
                 break
 
-        return result_handler
+        return result_handler
+
+class _LogWrapper:
+    def __init__(self, name: Optional[str]):
+        self._name = str(name)
+        self._logger = logging.getLogger(name)
+
+    def debug(self, msg: str, *args, **kwargs):
+        self._log(msg, logging.DEBUG, *args, **kwargs)
+
+    def info(self, msg: str, *args, **kwargs):
+        self._log(msg, logging.INFO, *args, **kwargs)
+
+    def warning(self, msg: str, *args, **kwargs):
+        self._log(msg, logging.WARNING, *args, **kwargs)
+
+    def error(self, msg: str, *args, **kwargs):
+        self._log(msg, logging.ERROR, *args, **kwargs)
+
+    def critical(self, msg: str, *args, **kwargs):
+        self._log(msg, logging.CRITICAL, *args, **kwargs)
+
+    def exception(self, msg: str, *args, **kwargs):
+        self._log(msg, logging.CRITICAL, *args, **kwargs)
+
+    def _log(self, msg: str, level: int, *args, **kwargs):
+        stacklevel = self._get_stacklevel(**kwargs)
+        msg = self._get_msg_execution(msg, **kwargs)
+        msg = self._get_msg_context(msg, **kwargs)
+        if level >= logging.ERROR:
+            _add_error(self._name, msg, stacklevel, level, args)
+            if level == logging.ERROR:
+                level = logging.WARNING
+        keys_to_remove = ['execution', 'context']
+        for key in keys_to_remove:
+            if key in kwargs:
+                del kwargs[key]
+        self._logger.log(level, msg, *args, **self._change_stacklevel(**kwargs))
+
+    def _change_stacklevel(self, **kwargs):
+        stacklevel = self._get_stacklevel(**kwargs)
+        return dict(kwargs, stacklevel = stacklevel)
+
+    def _get_stacklevel(self, **kwargs):
+        dict_kwargs = dict(kwargs)
+        stacklevel = 3
+        if 'stacklevel' in dict_kwargs:
+            stacklevel = 2 + dict_kwargs['stacklevel']
+        return stacklevel
+
+    def _get_msg_context(self, msg: str, **kwargs):
+        if 'context' in kwargs:
+            context = kwargs['context']
+            msg = f'[Context: {context}] {msg}'
+        return msg
+
+    def _get_msg_execution(self, msg: str, **kwargs):
+        if 'execution' in kwargs:
+            execution: Execution = kwargs['execution']
+            msg = f'[Execution: {execution.source.source_name} -> {execution.destination.destination_name}] {msg}'
+        return msg
+
+def getLogger(name: Optional[str] = None):
+    return get_logger(name)
+
+def get_logger(name: Optional[str] = None):
+    return _LogWrapper(name)
+
+_error_list: List[logging.LogRecord] = []
+
+def _add_error(name: str, msg: str, stacklevel: int, level: int, args):
+    fn, lno, func, sinfo = _get_stack_trace(stacklevel)    
+    _error_list.append(logging.LogRecord(name, level, fn, lno, msg, args, None, func, sinfo))
+
+def _get_stack_trace(stacklevel: int, stack_info: bool = True):
+    # from python logging module
+    f: Optional[FrameType] = sys._getframe(3)
+    if f is not None:
+        f = f.f_back
+    orig_f = f
+    while f and stacklevel > 1:
+        f = f.f_back
+        stacklevel -= 1
+    if not f:
+        f = orig_f
+    rv: Tuple[str, int, str, Optional[str]]= ("(unknown file)", 0, "(unknown function)", None)
+    if f is not None and hasattr(f, "f_code"):
+        co = f.f_code
+        sinfo = None
+        if stack_info:
+            sio = io.StringIO()
+            sio.write('Stack (most recent call last):\n')
+            traceback.print_stack(f, file=sio)
+            sinfo = sio.getvalue()
+            if sinfo[-1] == '\n':
+                sinfo = sinfo[:-1]
+            sio.close()
+        rv = (co.co_filename, f.f_lineno, co.co_name, sinfo)
+    return rv
+
+def has_errors() -> bool:
+    return len(_error_list) > 0
+
+def error_list() -> List[logging.LogRecord]:
+    return _error_list
+
+def get_formatted_error_list() -> Optional[str]:
+    records = _error_list
+    if records is not None and len(records) > 0:
+        message = ''
+        for i in range(len(records)):
+            rec = records[i]
+            message += f'{i+1}. {rec.msg}\n... in {rec.pathname}:{rec.lineno}\n'
+        return message
+    else:
+        return None
+
+def null_filter(el: Any) -> Any:
+    get_logger('megalista.LOG').info(f'Logging: {el}')
+    return el
diff --git a/megalista_dataflow/data_sources/base_data_source.py b/megalista_dataflow/data_sources/base_data_source.py
@@ -30,4 +30,11 @@ def retrieve_data(self, executions: ExecutionsGroupedBySource) -> List[DataRowsG
     raise NotImplementedError("Source Type not implemented. Please check your configuration (sheet / json / firestore).")
 
   def write_transactional_info(self, rows, execution):
-    raise NotImplementedError("Source Type not implemented. Please check your configuration (sheet / json / firestore).")
+    raise NotImplementedError("Source Type not implemented. Please check your configuration (sheet / json / firestore).")
+
+  @staticmethod
+  def _convert_row_to_dict(row):
+    dict = {}
+    for key, value in row.items():
+        dict[key] = value
+    return dict
diff --git a/megalista_dataflow/data_sources/big_query/big_query_data_source.py b/megalista_dataflow/data_sources/big_query/big_query_data_source.py
@@ -16,7 +16,7 @@
 from string import Template
 
 import apache_beam as beam
-import logging
+from config import logging
 from google.cloud import bigquery
 from google.cloud.bigquery import SchemaField, Client
 from apache_beam.io.gcp.bigquery import ReadFromBigQueryRequest

diff --git a/megalista_dataflow/data_sources/big_query/big_query_data_source_test.py b/megalista_dataflow/data_sources/big_query/big_query_data_source_test.py
@@ -13,15 +13,14 @@
 # limitations under the License.
 
 import datetime
+import pytest
 
 from models.execution import AccountConfig, ExecutionsGroupedBySource
 from models.execution import Destination
 from models.execution import DestinationType
 from models.execution import Execution
 from models.execution import Source
 from models.execution import SourceType
-from models.execution import Batch
-import pytest
 
 from models.execution import TransactionalType
 

diff --git a/megalista_dataflow/data_sources/data_schemas.py b/megalista_dataflow/data_sources/data_schemas.py
@@ -16,7 +16,6 @@
 from configparser import MissingSectionHeaderError
 from typing import List, Dict, Any
 from models.execution import Destination, DestinationType, Execution, Batch
-import logging
 import functools
 import pandas as pd
 import ast
@@ -104,9 +103,9 @@
                 'required': False, 'data_type': 'string'},
             {'name': 'mailing_address_last_name',
                 'required': False, 'data_type': 'string'},
-            {'name': 'mailing_address_country_name',
+            {'name': 'mailing_address_country',
                 'required': False, 'data_type': 'string'},
-            {'name': 'mailing_address_zip_name',
+            {'name': 'mailing_address_zip',
                 'required': False, 'data_type': 'string'}
         ],
         'groups': []

diff --git a/megalista_dataflow/data_sources/file/file_data_source.py b/megalista_dataflow/data_sources/file/file_data_source.py
@@ -25,7 +25,7 @@
 from apache_beam.typehints.decorators import with_output_types
 import numpy as np
 
-import logging
+from config import logging
 
 from models.execution import SourceType, DestinationType, Execution, Batch, TransactionalType, ExecutionsGroupedBySource, DataRowsGroupedBySource
 from models.options import DataflowOptions

diff --git a/megalista_dataflow/data_sources/file/file_provider.py b/megalista_dataflow/data_sources/file/file_provider.py
@@ -20,7 +20,7 @@
 """
 
 import io
-import logging
+from config import logging
 from os.path import exists
 from urllib.parse import ParseResultBytes
 

diff --git a/megalista_dataflow/error/error_handling.py b/megalista_dataflow/error/error_handling.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import base64
-import logging
+from config import logging
 from email.mime.text import MIMEText
 from typing import Iterable, Optional, Dict
 
@@ -163,7 +163,7 @@ def add_error(self, execution: Execution, error_message: str):
 
     if execution.destination.destination_type != self._destination_type:
       raise ValueError(
-        f'Received a error of destination type: {execution.destination.destination_type}'
+        f'Received an error of destination type: {execution.destination.destination_type}'
         f' but this error handler is initialized with {self._destination_type} destination type')
 
     error = Error(execution, error_message)

diff --git a/megalista_dataflow/error/logging_handler.py b/megalista_dataflow/error/logging_handler.py
diff --git a/megalista_dataflow/error/logging_handler_test.py b/megalista_dataflow/error/logging_handler_test.py