|
26 | 26 |
|
27 | 27 | # not sure why but mypy complains on missing `storage` but it is clearly there and is importable
|
28 | 28 | from google.cloud import storage # type: ignore[attr-defined]
|
| 29 | +from packaging.version import Version |
29 | 30 |
|
30 | 31 | from airflow.configuration import conf
|
31 | 32 | from airflow.exceptions import AirflowNotFoundException
|
|
47 | 48 | logger = logging.getLogger(__name__)
|
48 | 49 |
|
49 | 50 |
|
| 51 | +def get_default_delete_local_copy(): |
| 52 | + """Load delete_local_logs conf if Airflow version > 2.6 and return False if not. |
| 53 | +
|
| 54 | + TODO: delete this function when min airflow version >= 2.6. |
| 55 | + """ |
| 56 | + from airflow.version import version |
| 57 | + |
| 58 | + if Version(version) < Version("2.6"): |
| 59 | + return False |
| 60 | + return conf.getboolean("logging", "delete_local_logs") |
| 61 | + |
| 62 | + |
50 | 63 | class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
51 | 64 | """
|
52 | 65 | GCSTaskHandler is a python log handler that handles and reads task instance logs.
|
@@ -95,8 +108,8 @@ def __init__(
|
95 | 108 | self.gcp_keyfile_dict = gcp_keyfile_dict
|
96 | 109 | self.scopes = gcp_scopes
|
97 | 110 | self.project_id = project_id
|
98 |
| - self.delete_local_copy = kwargs.get( |
99 |
| - "delete_local_copy", conf.getboolean("logging", "delete_local_logs") |
| 111 | + self.delete_local_copy = ( |
| 112 | + kwargs["delete_local_copy"] if "delete_local_copy" in kwargs else get_default_delete_local_copy() |
100 | 113 | )
|
101 | 114 |
|
102 | 115 | @cached_property
|
@@ -205,6 +218,30 @@ def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[list[str], l
|
205 | 218 | messages.append(f"Unable to read remote log {e}")
|
206 | 219 | return messages, logs
|
207 | 220 |
|
| 221 | + def _read(self, ti, try_number, metadata=None): |
| 222 | + """ |
| 223 | + Read logs of given task instance and try_number from GCS. |
| 224 | +
|
| 225 | + If failed, read the log from task instance host machine. |
| 226 | +
|
| 227 | + todo: when min airflow version >= 2.6, remove this method |
| 228 | +
|
| 229 | + :param ti: task instance object |
| 230 | + :param try_number: task instance try_number to read logs from |
| 231 | + :param metadata: log metadata, |
| 232 | + can be used for steaming log reading and auto-tailing. |
| 233 | + """ |
| 234 | + if hasattr(super(), "_read_remote_logs"): |
| 235 | + # from Airflow 2.6, we don't implement the `_read` method. |
| 236 | + # if parent has _read_remote_logs, we're >= 2.6 |
| 237 | + return super()._read(ti, try_number, metadata) |
| 238 | + |
| 239 | + messages, logs = self._read_remote_logs(ti, try_number, metadata) |
| 240 | + if not logs: |
| 241 | + return super()._read(ti, try_number, metadata) |
| 242 | + |
| 243 | + return "".join([f"*** {x}\n" for x in messages]) + "\n".join(logs), {"end_of_log": True} |
| 244 | + |
208 | 245 | def gcs_write(self, log, remote_log_location) -> bool:
|
209 | 246 | """
|
210 | 247 | Write the log to the remote location and return `True`; fail silently and return `False` on error.
|
|
0 commit comments