forked from smarnach/pyexiftool
-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathexiftool.py
1308 lines (928 loc) · 52.2 KB
/
exiftool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
#
# This file is part of PyExifTool.
#
# PyExifTool <http://github.com/sylikc/pyexiftool>
#
# Copyright 2019-2023 Kevin M (sylikc)
# Copyright 2012-2014 Sven Marnach
#
# Community contributors are listed in the CHANGELOG.md for the PRs
#
# PyExifTool is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the licence, or
# (at your option) any later version, or the BSD licence.
#
# PyExifTool is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING.GPL or COPYING.BSD for more details.
"""
This submodule contains the core ``ExifTool`` class of PyExifTool
.. note::
:py:class:`exiftool.helper.ExifTool` class of this submodule is available in the ``exiftool`` namespace as :py:class:`exiftool.ExifTool`
"""
# ---------- standard Python imports ----------
import select
import subprocess
import os
import shutil
from pathlib import Path # requires Python 3.4+
import random
import locale
import warnings
import json # NOTE: to use other json libraries (simplejson/ujson/orjson/...), see :py:meth:`set_json_loads()`
# for the pdeathsig
import signal
import ctypes
# ---------- Typing Imports ----------
# for static analysis / type checking - Python 3.5+
from collections.abc import Callable
from typing import Optional, List, Union
# ---------- Library Package Imports ----------
from . import constants
from .exceptions import ExifToolVersionError, ExifToolRunning, ExifToolNotRunning, ExifToolOutputEmptyError, ExifToolJSONInvalidError
# ======================================================================================================================
# constants to make typos obsolete!
ENCODING_UTF8: str = "utf-8"
#ENCODING_LATIN1: str = "latin-1"
# ======================================================================================================================
def _set_pdeathsig(sig) -> Optional[Callable]:
"""
Use this method in subprocess.Popen(preexec_fn=set_pdeathsig()) to make sure,
the exiftool childprocess is stopped if this process dies.
However, this only works on linux.
"""
if constants.PLATFORM_LINUX:
def callable_method():
libc = ctypes.CDLL("libc.so.6")
return libc.prctl(constants.PR_SET_PDEATHSIG, sig)
return callable_method
else:
return None # pragma: no cover
# ======================================================================================================================
def _get_buffer_end(buffer_list: List[bytes], bytes_needed: int) -> bytes:
""" Given a list of bytes objects, return the equivalent of
b"".join(buffer_list)[-bytes_needed:]
but without having to concatenate the entire list.
"""
if bytes_needed < 1:
return b"" # pragma: no cover
buf_chunks = []
for buf in reversed(buffer_list):
buf_tail = buf[-bytes_needed:]
buf_chunks.append(buf_tail)
bytes_needed -= len(buf_tail)
if bytes_needed <= 0:
break
buf_tail_joined = b"".join(reversed(buf_chunks))
return buf_tail_joined
def _read_fd_endswith(fd, b_endswith: bytes, block_size: int) -> bytes:
""" read an fd and keep reading until it endswith the seq_ends
this allows a consolidated read function that is platform indepdent
if you're not careful, on windows, this will block
"""
output_list: List[bytes] = []
# if we're only looking at the last few bytes, make it meaningful. 4 is max size of \r\n? (or 2)
# this value can be bigger to capture more bytes at the "tail" of the read, but if it's too small, the whitespace might miss the detection
endswith_count = len(b_endswith) + 4
# I believe doing a splice, then a strip is more efficient in memory hence the original code did it this way.
# need to benchmark to see if in large strings, strip()[-endswithcount:] is more expensive or not
while not _get_buffer_end(output_list, endswith_count).strip().endswith(b_endswith):
if constants.PLATFORM_WINDOWS:
# windows does not support select() for anything except sockets
# https://docs.python.org/3.7/library/select.html
output_list.append(os.read(fd, block_size))
else: # pytest-cov:windows: no cover
# this does NOT work on windows... and it may not work on other systems... in that case, put more things to use the original code above
inputready, outputready, exceptready = select.select([fd], [], [])
for i in inputready:
if i == fd:
output_list.append(os.read(fd, block_size))
return b"".join(output_list)
# ======================================================================================================================
class ExifTool(object):
"""Run the `exiftool` command-line tool and communicate with it.
Use ``common_args`` to enable/disable print conversion by specifying/omitting ``-n``, respectively.
This determines whether exiftool should perform print conversion,
which prints values in a human-readable way but
may be slower. If print conversion is enabled, appending ``#`` to a tag
name disables the print conversion for this particular tag.
See `Exiftool print conversion FAQ`_ for more details.
.. _Exiftool print conversion FAQ: https://exiftool.org/faq.html#Q6
Some methods of this class are only available after calling
:py:meth:`run()`, which will actually launch the *exiftool* subprocess.
To avoid leaving the subprocess running, make sure to call
:py:meth:`terminate()` method when finished using the instance.
This method will also be implicitly called when the instance is
garbage collected, but there are circumstance when this won't ever
happen, so you should not rely on the implicit process
termination. Subprocesses won't be automatically terminated if
the parent process exits, so a leaked subprocess will stay around
until manually killed.
A convenient way to make sure that the subprocess is terminated is
to use the :py:class:`ExifTool` instance as a context manager::
with ExifTool() as et:
...
.. warning::
Note that options and parameters are not checked. There is no error handling or validation of options passed to *exiftool*.
Nonsensical options are mostly silently ignored by exiftool, so there's not
much that can be done in that regard. You should avoid passing
non-existent files to any of the methods, since this will lead
to undefined behaviour.
"""
##############################################################################
#################################### INIT ####################################
##############################################################################
# ----------------------------------------------------------------------------------------------------------------------
def __init__(self,
executable: Optional[str] = None,
common_args: Optional[List[str]] = ["-G", "-n"],
win_shell: bool = False,
config_file: Optional[Union[str, Path]] = None,
encoding: Optional[str] = None,
logger = None) -> None:
"""
:param executable: Specify file name of the *exiftool* executable if it is in your ``PATH``. Otherwise, specify the full path to the ``exiftool`` executable.
Passed directly into :py:attr:`executable` property.
.. note::
The default value :py:attr:`exiftool.constants.DEFAULT_EXECUTABLE` will only work if the executable is in your ``PATH``.
:type executable: str, or None to use default
:param common_args:
Pass in additional parameters for the stay-open instance of exiftool.
Defaults to ``["-G", "-n"]`` as this is the most common use case.
* ``-G`` (groupName level 1 enabled) separates the output with *groupName:tag* to disambiguate same-named tags under different groups.
* ``-n`` (print conversion disabled) improves the speed and consistency of output, and is more machine-parsable
Passed directly into :py:attr:`common_args` property.
.. note::
Depending on your use case, there may be other useful grouping levels and options. Search `Phil Harvey's exiftool documentation`_ for **groupNames** and **groupHeadings** to get more info.
.. _`Phil Harvey's exiftool documentation`: https://exiftool.org/exiftool_pod.html
:type common_args: list of str, or None.
:param bool win_shell: (Windows only) Minimizes the exiftool process.
.. note::
This parameter may be deprecated in the future
:param config_file:
File path to ``-config`` parameter when starting exiftool process.
Passed directly into :py:attr:`config_file` property.
:type config_file: str, Path, or None
:param encoding: Specify encoding to be used when communicating with
exiftool process. By default, will use ``locale.getpreferredencoding()``
Passed directly into :py:attr:`encoding` property.
:param logger: Set a custom logger to log status and debug messages to.
Passed directly into :py:attr:`logger` property.
"""
# --- default settings / declare member variables ---
self._running: bool = False # is it running?
"""A Boolean value indicating whether this instance is currently
associated with a running subprocess."""
self._win_shell: bool = win_shell # do you want to see the shell on Windows?
self._process = None # this is set to the process to interact with when _running=True
self._ver: Optional[str] = None # this is set to be the exiftool -v -ver when running
self._last_stdout: Optional[str] = None # previous output
self._last_stderr: Optional[str] = None # previous stderr
self._last_status: Optional[int] = None # previous exit status from exiftool (look up EXIT STATUS in exiftool documentation for more information)
self._block_size: int = constants.DEFAULT_BLOCK_SIZE # set to default block size
# these are set via properties
self._executable: Union[str, Path] = constants.DEFAULT_EXECUTABLE # executable absolute path (default set to just the executable name, so it can't be None)
self._config_file: Optional[str] = None # config file that can only be set when exiftool is not running
self._common_args: Optional[List[str]] = None
self._logger = None
self._encoding: Optional[str] = None
self._json_loads: Callable = json.loads # variable points to the actual callable method
self._json_loads_kwargs: dict = {} # default optional params to pass into json.loads() call
# --- run external library initialization code ---
random.seed(None) # initialize random number generator
# --- set variables via properties (which do the error checking) --
# set first, so that debug and info messages get logged
self.logger = logger
# use the passed in parameter, or the default if not set
# error checking is done in the property.setter
self.executable = executable or constants.DEFAULT_EXECUTABLE
self.encoding = encoding
self.common_args = common_args
# set the property, error checking happens in the property.setter
self.config_file = config_file
#######################################################################################
#################################### MAGIC METHODS ####################################
#######################################################################################
# ----------------------------------------------------------------------------------------------------------------------
def __enter__(self):
self.run()
return self
# ----------------------------------------------------------------------------------------------------------------------
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
if self.running:
self.terminate()
# ----------------------------------------------------------------------------------------------------------------------
def __del__(self) -> None:
if self.running:
# indicate that __del__ has been started - allows running alternate code path in terminate()
self.terminate(_del=True)
########################################################################################
#################################### PROPERTIES R/w ####################################
########################################################################################
# ----------------------------------------------------------------------------------------------------------------------
@property
def executable(self) -> Union[str, Path]:
"""
Path to *exiftool* executable.
:getter: Returns current exiftool path
:setter: Specify just the executable name, or an absolute path to the executable.
If path given is not absolute, searches environment ``PATH``.
.. note::
Setting is only available when exiftool process is not running.
:raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True)
:type: str, Path
"""
return self._executable
@executable.setter
def executable(self, new_executable: Union[str, Path]) -> None:
# cannot set executable when process is running
if self.running:
raise ExifToolRunning("Cannot set new executable")
abs_path: Optional[str] = None
# in testing, shutil.which() will work if a complete path is given,
# but this isn't clear from documentation, so we explicitly check and
# don't search if path exists
if Path(new_executable).exists():
abs_path = new_executable
else:
# Python 3.3+ required
abs_path = shutil.which(new_executable)
if abs_path is None:
raise FileNotFoundError(f'"{new_executable}" is not found, on path or as absolute path')
# absolute path is returned
self._executable = str(abs_path)
if self._logger: self._logger.info(f"Property 'executable': set to \"{abs_path}\"")
# ----------------------------------------------------------------------------------------------------------------------
@property
def encoding(self) -> Optional[str]:
"""
Encoding of Popen() communication with *exiftool* process.
:getter: Returns current encoding setting
:setter: Set a new encoding.
* If *new_encoding* is None, will detect it from ``locale.getpreferredencoding(do_setlocale=False)`` (do_setlocale is set to False as not to affect the caller).
* Default to ``utf-8`` if nothing is returned by ``getpreferredencoding``
.. warning::
Property setter does NOT validate the encoding for validity. It is passed verbatim into subprocess.Popen()
.. note::
Setting is only available when exiftool process is not running.
:raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True)
"""
return self._encoding
@encoding.setter
def encoding(self, new_encoding: Optional[str]) -> None:
# cannot set encoding when process is running
if self.running:
raise ExifToolRunning("Cannot set new encoding")
# auto-detect system specific
self._encoding = new_encoding or (locale.getpreferredencoding(do_setlocale=False) or ENCODING_UTF8)
# ----------------------------------------------------------------------------------------------------------------------
@property
def block_size(self) -> int:
"""
Block size for communicating with *exiftool* subprocess. Used when reading from the I/O pipe.
:getter: Returns current block size
:setter: Set a new block_size. Does basic error checking to make sure > 0.
:raises ValueError: If new block size is invalid
:type: int
"""
return self._block_size
@block_size.setter
def block_size(self, new_block_size: int) -> None:
if new_block_size <= 0:
raise ValueError("Block Size doesn't make sense to be <= 0")
self._block_size = new_block_size
if self._logger: self._logger.info(f"Property 'block_size': set to \"{new_block_size}\"")
# ----------------------------------------------------------------------------------------------------------------------
@property
def common_args(self) -> Optional[List[str]]:
"""
Common Arguments executed with every command passed to *exiftool* subprocess
This is the parameter `-common_args`_ that is passed when the *exiftool* process is STARTED
Read `Phil Harvey's ExifTool documentation`_ to get further information on what options are available / how to use them.
.. _-common_args: https://exiftool.org/exiftool_pod.html#Advanced-options
.. _Phil Harvey's ExifTool documentation: https://exiftool.org/exiftool_pod.html
:getter: Returns current common_args list
:setter: If ``None`` is passed in, sets common_args to ``[]``. Otherwise, sets the given list without any validation.
.. warning::
No validation is done on the arguments list. It is passed verbatim to *exiftool*. Invalid options or combinations may result in undefined behavior.
.. note::
Setting is only available when exiftool process is not running.
:raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True)
:raises TypeError: If setting is not a list
:type: list[str], None
"""
return self._common_args
@common_args.setter
def common_args(self, new_args: Optional[List[str]]) -> None:
if self.running:
raise ExifToolRunning("Cannot set new common_args")
if new_args is None:
self._common_args = []
elif isinstance(new_args, list):
# default parameters to exiftool
# -n = disable print conversion (speedup)
self._common_args = new_args
else:
raise TypeError("common_args not a list of strings")
if self._logger: self._logger.info(f"Property 'common_args': set to \"{self._common_args}\"")
# ----------------------------------------------------------------------------------------------------------------------
@property
def config_file(self) -> Optional[Union[str, Path]]:
"""
Path to config file.
See `ExifTool documentation for -config`_ for more details.
:getter: Returns current config file path, or None if not set
:setter: File existence is checked when setting parameter
* Set to ``None`` to disable the ``-config`` parameter when starting *exiftool*
* Set to ``""`` has special meaning and disables loading of the default config file. See `ExifTool documentation for -config`_ for more info.
.. note::
Currently file is checked for existence when setting. It is not checked when starting process.
:raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True)
:type: str, Path, None
.. _ExifTool documentation for -config: https://exiftool.org/exiftool_pod.html#Advanced-options
"""
return self._config_file
@config_file.setter
def config_file(self, new_config_file: Optional[Union[str, Path]]) -> None:
if self.running:
raise ExifToolRunning("Cannot set a new config_file")
if new_config_file is None:
self._config_file = None
elif new_config_file == "":
# this is VALID usage of -config parameter
# As per exiftool documentation: Loading of the default config file may be disabled by setting CFGFILE to an empty string (ie. "")
self._config_file = ""
elif not Path(new_config_file).exists():
raise FileNotFoundError("The config file could not be found")
else:
self._config_file = str(new_config_file)
if self._logger: self._logger.info(f"Property 'config_file': set to \"{self._config_file}\"")
##############################################################################################
#################################### PROPERTIES Read only ####################################
##############################################################################################
# ----------------------------------------------------------------------------------------------------------------------
@property
def running(self) -> bool:
"""
Read-only property which indicates whether the *exiftool* subprocess is running or not.
:getter: Returns current running state
.. note::
This checks to make sure the process is still alive.
If the process has died since last `running` detection, this property
will detect the state change and reset the status accordingly.
"""
if self._running:
# check if the process is actually alive
if self._process.poll() is not None:
# process died
warnings.warn("ExifTool process was previously running but died")
self._flag_running_false()
if self._logger: self._logger.warning("Property 'running': ExifTool process was previously running but died")
return self._running
# ----------------------------------------------------------------------------------------------------------------------
@property
def version(self) -> str:
"""
Read-only property which is the string returned by ``exiftool -ver``
The *-ver* command is ran once at process startup and cached.
:getter: Returns cached output of ``exiftool -ver``
:raises ExifToolNotRunning: If attempting to read while not running (:py:attr:`running` == False)
"""
if not self.running:
raise ExifToolNotRunning("Can't get ExifTool version")
return self._ver
# ----------------------------------------------------------------------------------------------------------------------
@property
def last_stdout(self) -> Optional[Union[str, bytes]]:
"""
``STDOUT`` for most recent result from execute()
.. note::
The return type can be either str or bytes.
If the most recent call to execute() ``raw_bytes=True``, then this will return ``bytes``. Otherwise this will be ``str``.
.. note::
This property can be read at any time, and is not dependent on running state of ExifTool.
It is INTENTIONALLY *NOT* CLEARED on exiftool termination, to allow
for executing a command and terminating, but still having the result available.
"""
return self._last_stdout
# ----------------------------------------------------------------------------------------------------------------------
@property
def last_stderr(self) -> Optional[Union[str, bytes]]:
"""
``STDERR`` for most recent result from execute()
.. note::
The return type can be either ``str`` or ``bytes``.
If the most recent call to execute() ``raw_bytes=True``, then this will return ``bytes``. Otherwise this will be ``str``.
.. note::
This property can be read at any time, and is not dependent on running state of ExifTool.
It is INTENTIONALLY *NOT* CLEARED on exiftool termination, to allow
for executing a command and terminating, but still having the result available.
"""
return self._last_stderr
# ----------------------------------------------------------------------------------------------------------------------
@property
def last_status(self) -> Optional[int]:
"""
``Exit Status Code`` for most recent result from execute()
.. note::
This property can be read at any time, and is not dependent on running state of ExifTool.
It is INTENTIONALLY *NOT* CLEARED on exiftool termination, to allow
for executing a command and terminating, but still having the result available.
"""
return self._last_status
###############################################################################################
#################################### PROPERTIES Write only ####################################
###############################################################################################
# ----------------------------------------------------------------------------------------------------------------------
def _set_logger(self, new_logger) -> None:
""" set a new user-created logging.Logger object
can be set at any time to start logging.
Set to None at any time to stop logging.
"""
if new_logger is None:
self._logger = None
return
# can't check this in case someone passes a drop-in replacement, like loguru, which isn't type logging.Logger
#elif not isinstance(new_logger, logging.Logger):
# raise TypeError("logger needs to be of type logging.Logger")
# do some basic checks on methods available in the "logger" provided
check = True
try:
# ExifTool will probably use all of these logging method calls at some point
# check all these are callable methods
check = callable(new_logger.info) and \
callable(new_logger.warning) and \
callable(new_logger.error) and \
callable(new_logger.critical) and \
callable(new_logger.exception)
except AttributeError:
check = False
if not check:
raise TypeError("logger needs to implement methods (info,warning,error,critical,exception)")
self._logger = new_logger
# have to run this at the class level to create a special write-only property
# https://stackoverflow.com/questions/17576009/python-class-property-use-setter-but-evade-getter
# https://docs.python.org/3/howto/descriptor.html#properties
# can have it named same or different
logger = property(fset=_set_logger, doc="""Write-only property to set the class of logging.Logger""")
"""
Write-only property to set the class of logging.Logger
If this is set, then status messages will log out to the given class.
.. note::
This can be set and unset (set to ``None``) at any time, regardless of whether the subprocess is running (:py:attr:`running` == True) or not.
:setter: Specify an object to log to. The class is not checked, but validation is done to ensure the object has callable methods ``info``, ``warning``, ``error``, ``critical``, ``exception``.
:raises AttributeError: If object does not contain one or more of the required methods.
:raises TypeError: If object contains those attributes, but one or more are non-callable methods.
:type: Object
"""
#########################################################################################
##################################### SETTER METHODS ####################################
#########################################################################################
# ----------------------------------------------------------------------------------------------------------------------
def set_json_loads(self, json_loads, **kwargs) -> None:
"""
**Advanced**: Override default built-in ``json.loads()`` method. The method is only used once in :py:meth:`execute_json`
This allows using a different json string parser.
(Alternate json libraries typically provide faster speed than the
built-in implementation, more supported features, and/or different behavior.)
Examples of json libraries: `orjson`_, `rapidjson`_, `ujson`_, ...
.. note::
This method is designed to be called the same way you would expect to call the provided ``json_loads`` method.
Include any ``kwargs`` you would in the call.
For example, to pass additional arguments to ``json.loads()``: ``set_json_loads(json.loads, parse_float=str)``
.. note::
This can be set at any time, regardless of whether the subprocess is running (:py:attr:`running` == True) or not.
.. warning::
This setter does not check whether the method provided actually parses json. Undefined behavior or crashes may occur if used incorrectly
This is **advanced configuration** for specific use cases only.
For an example use case, see the :ref:`FAQ <set_json_loads faq>`
:param json_loads: A callable method to replace built-in ``json.loads`` used in :py:meth:`execute_json`
:type json_loads: callable
:param kwargs: Parameters passed to the ``json_loads`` method call
:raises TypeError: If ``json_loads`` is not callable
.. _orjson: https://pypi.org/project/orjson/
.. _rapidjson: https://pypi.org/project/python-rapidjson/
.. _ujson: https://pypi.org/project/ujson/
"""
if not callable(json_loads):
# not a callable method
raise TypeError
self._json_loads = json_loads
self._json_loads_kwargs = kwargs
#########################################################################################
#################################### PROCESS CONTROL ####################################
#########################################################################################
# ----------------------------------------------------------------------------------------------------------------------
def run(self) -> None:
"""Start an *exiftool* subprocess in batch mode.
This method will issue a ``UserWarning`` if the subprocess is
already running (:py:attr:`running` == True). The process is started with :py:attr:`common_args` as common arguments,
which are automatically included in every command you run with :py:meth:`execute()`.
You can override these default arguments with the
``common_args`` parameter in the constructor or setting :py:attr:`common_args` before caaling :py:meth:`run()`.
.. note::
If you have another executable named *exiftool* which isn't Phil Harvey's ExifTool, then you're shooting yourself in the foot as there's no error checking for that
:raises FileNotFoundError: If *exiftool* is no longer found. Re-raised from subprocess.Popen()
:raises OSError: Re-raised from subprocess.Popen()
:raises ValueError: Re-raised from subprocess.Popen()
:raises subproccess.CalledProcessError: Re-raised from subprocess.Popen()
:raises RuntimeError: Popen() launched process but it died right away
:raises ExifToolVersionError: :py:attr:`exiftool.constants.EXIFTOOL_MINIMUM_VERSION` not met. ExifTool process will be automatically terminated.
"""
if self.running:
warnings.warn("ExifTool already running; doing nothing.", UserWarning)
return
# first the executable ...
# TODO should we check the executable for existence here?
proc_args = [self._executable, ]
# If working with a config file, it must be the first argument after the executable per: https://exiftool.org/config.html
if self._config_file is not None:
# must check explicitly for None, as "" is valid
# TODO check that the config file exists here?
proc_args.extend(["-config", self._config_file])
# this is the required stuff for the stay_open that makes pyexiftool so great!
proc_args.extend(["-stay_open", "True", "-@", "-"])
# only if there are any common_args. [] and None are skipped equally with this
if self._common_args:
proc_args.append("-common_args") # add this param only if there are common_args
proc_args.extend(self._common_args) # add the common arguments
# ---- set platform-specific kwargs for Popen ----
kwargs: dict = {}
if constants.PLATFORM_WINDOWS:
# TODO: I don't think this code actually does anything ... I've never seen a console pop up on Windows
# Perhaps need to specify subprocess.STARTF_USESHOWWINDOW to actually have any console pop up?
# https://docs.python.org/3/library/subprocess.html#windows-popen-helpers
startup_info = subprocess.STARTUPINFO()
if not self._win_shell:
# Adding enum 11 (SW_FORCEMINIMIZE in win32api speak) will
# keep it from throwing up a DOS shell when it launches.
startup_info.dwFlags |= constants.SW_FORCEMINIMIZE
kwargs["startupinfo"] = startup_info
else: # pytest-cov:windows: no cover
# assume it's linux
kwargs["preexec_fn"] = _set_pdeathsig(signal.SIGTERM)
# Warning: The preexec_fn parameter is not safe to use in the presence of threads in your application.
# https://docs.python.org/3/library/subprocess.html#subprocess.Popen
try:
# NOTE: the encoding= parameter was removed from the Popen() call to support
# using bytes in the actual communication with exiftool process.
# Due to the way the code is written, ExifTool only uses stdin.write which would need to be in bytes.
# The reading is _NOT_ using subprocess.communicate(). This class reads raw bytes using os.read()
# Therefore, by switching off the encoding= in Popen(), we can support both bytes and str at the
# same time. (This change was to support https://github.com/sylikc/pyexiftool/issues/47)
# unify both platform calls into one subprocess.Popen call
self._process = subprocess.Popen(
proc_args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**kwargs)
except FileNotFoundError:
raise
except OSError:
raise
except ValueError:
raise
except subprocess.CalledProcessError:
raise
# TODO print out more useful error messages to these different errors above
# check error above before saying it's running
if self._process.poll() is not None:
# the Popen launched, then process terminated
self._process = None # unset it as it's now terminated
raise RuntimeError("exiftool did not execute successfully")
# have to set this before doing the checks below, or else execute() will fail
self._running = True
# get ExifTool version here and any Exiftool metadata
# this can also verify that it is really ExifTool we ran, not some other random process
try:
# apparently because .execute() has code that already depends on v12.15+ functionality,
# _parse_ver() will throw a ValueError immediately with:
# ValueError: invalid literal for int() with base 10: '${status}'
self._ver = self._parse_ver()
except ValueError:
# trap the error and return it as a minimum version problem
self.terminate()
raise ExifToolVersionError(f"Error retrieving Exiftool info. Is your Exiftool version ('exiftool -ver') >= required version ('{constants.EXIFTOOL_MINIMUM_VERSION}')?")
if self._logger: self._logger.info(f"Method 'run': Exiftool version '{self._ver}' (pid {self._process.pid}) launched with args '{proc_args}'")
# currently not needed... if it passes -ver check, the rest is OK
# may use in the future again if another version feature is needed but the -ver check passes
"""
# check that the minimum required version is met, if not, terminate...
# if you run against a version which isn't supported, strange errors come up during execute()
if not self._exiftool_version_check():
self.terminate()
if self._logger: self._logger.error(f"Method 'run': Exiftool version '{self._ver}' did not meet the required minimum version '{constants.EXIFTOOL_MINIMUM_VERSION}'")
raise ExifToolVersionError(f"exiftool version '{self._ver}' < required '{constants.EXIFTOOL_MINIMUM_VERSION}'")
"""
# ----------------------------------------------------------------------------------------------------------------------
def terminate(self, timeout: int = 30, _del: bool = False) -> None:
"""Terminate the *exiftool* subprocess.
If the subprocess isn't running, this method will throw a warning, and do nothing.
.. note::
There is a bug in CPython 3.8+ on Windows where terminate() does not work during ``__del__()``
See CPython issue `starting a thread in __del__ hangs at interpreter shutdown`_ for more info.
.. _starting a thread in __del__ hangs at interpreter shutdown: https://github.com/python/cpython/issues/87950
"""
if not self.running:
warnings.warn("ExifTool not running; doing nothing.", UserWarning)
return
if _del and constants.PLATFORM_WINDOWS:
# don't cleanly exit on windows, during __del__ as it'll freeze at communicate()
self._process.kill()
#print("before comm", self._process.poll(), self._process)
self._process.poll()
try:
# TODO freezes here on windows if subprocess zombie remains
outs, errs = self._process.communicate() # have to cleanup the process or else .poll() will return None
#print("after comm")
# TODO a bug filed with Python, or user error... this doesn't seem to work at all ... .communicate() still hangs
# https://bugs.python.org/issue43784 , https://github.com/python/cpython/issues/87950... Windows-specific issue affecting Python 3.8-3.10 (as of this time)
except RuntimeError:
# Python 3.12 throws a runtime error -- see https://github.com/python/cpython/pull/104826
# RuntimeError: can't create new thread at interpreter shutdown
pass
else:
try:
"""
On Windows, running this after __del__ freezes at communicate(), regardless of timeout
see the bug filed above for details
On Linux, this runs as is, and the process terminates properly
"""
try:
self._process.communicate(input=b"-stay_open\nFalse\n", timeout=timeout) # this is a constant sequence specified by PH's exiftool
except RuntimeError:
# Python 3.12 throws a runtime error -- see https://github.com/python/cpython/pull/104826
# RuntimeError: can't create new thread at interpreter shutdown
pass
self._process.kill()
except subprocess.TimeoutExpired: # this is new in Python 3.3 (for python 2.x, use the PyPI subprocess32 module)
self._process.kill()
outs, errs = self._process.communicate()
# err handling code from https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate
self._flag_running_false()
# TODO log / return exit status from exiftool?
if self._logger: self._logger.info("Method 'terminate': Exiftool terminated successfully.")
##################################################################################
#################################### EXECUTE* ####################################
##################################################################################
# ----------------------------------------------------------------------------------------------------------------------
def execute(self, *params: Union[str, bytes], raw_bytes: bool = False) -> Union[str, bytes]:
"""Execute the given batch of parameters with *exiftool*.
This method accepts any number of parameters and sends them to
the attached ``exiftool`` subprocess. The process must be
running, otherwise :py:exc:`exiftool.exceptions.ExifToolNotRunning` is raised. The final
``-execute`` necessary to actually run the batch is appended
automatically; see the documentation of :py:meth:`run()` for
the common options. The ``exiftool`` output is read up to the
end-of-output sentinel and returned as a ``str`` decoded
based on the currently set :py:attr:`encoding`,
excluding the sentinel.
The parameters must be of type ``str`` or ``bytes``.
``str`` parameters are encoded to bytes automatically using the :py:attr:`encoding` property.
For filenames, this should be the system's filesystem encoding.
``bytes`` parameters are untouched and passed directly to ``exiftool``.
.. note::
This is the core method to interface with the ``exiftool`` subprocess.
No processing is done on the input or output.
:param params: One or more parameters to send to the ``exiftool`` subprocess.
Typically passed in via `Unpacking Argument Lists`_
.. note::
The parameters to this function must be type ``str`` or ``bytes``.
:type params: one or more string/bytes parameters
:param raw_bytes: If True, returns bytes. Default behavior returns a str
:return:
* STDOUT is returned by the method call, and is also set in :py:attr:`last_stdout`
* STDERR is set in :py:attr:`last_stderr`
* Exit Status of the command is set in :py:attr:`last_status`
:raises ExifToolNotRunning: If attempting to execute when not running (:py:attr:`running` == False)
:raises ExifToolVersionError: If unexpected text was returned from the command while parsing out the sentinels
:raises UnicodeDecodeError: If the :py:attr:`encoding` is not specified properly, it may be possible for ``.decode()`` method to raise this error
:raises TypeError: If ``params`` argument is not ``str`` or ``bytes``
.. _Unpacking Argument Lists: https://docs.python.org/3/tutorial/controlflow.html#unpacking-argument-lists
"""
if not self.running:
raise ExifToolNotRunning("Cannot execute()")
# ---------- build the special params to execute ----------