diff --git a/pipcl.py b/pipcl.py index 89abc6425..c533c73ee 100644 --- a/pipcl.py +++ b/pipcl.py @@ -19,6 +19,7 @@ import base64 import codecs +import difflib import glob import hashlib import inspect @@ -605,7 +606,10 @@ def build_wheel(self, f' metadata_directory={metadata_directory!r}' ) - if sys.implementation.name == 'graalpy': + if os.environ.get('CIBUILDWHEEL') == '1': + # Don't special-case graal builds when running under cibuildwheel. + pass + elif sys.implementation.name == 'graalpy': # We build for Graal by building a native Python wheel with Graal # Python's include paths and library directory. We then rename the # wheel to contain graal's tag etc. @@ -1496,6 +1500,7 @@ def build_extension( name, path_i, outdir, + *, builddir=None, includes=None, defines=None, @@ -1507,6 +1512,7 @@ def build_extension( linker_extra='', swig=None, cpp=True, + source_extra=None, prerequisites_swig=None, prerequisites_compile=None, prerequisites_link=None, @@ -1559,6 +1565,8 @@ def build_extension( Swig command; if false we use 'swig'. cpp: If true we tell SWIG to generate C++ code instead of C. + source_extra: + Extra source files to build into the shared library, prerequisites_swig: prerequisites_compile: prerequisites_link: @@ -1593,7 +1601,7 @@ def build_extension( `compile_extra` (also `/I` on windows) and use them with swig so that it can see the same header files as C/C++. This is useful when using enviromment variables such as `CC` and `CXX` to set - `compile_extra. + `compile_extra`. py_limited_api: If true we build for current Python's limited API / stable ABI. @@ -1608,6 +1616,12 @@ def build_extension( builddir = outdir if not swig: swig = 'swig' + + if source_extra is None: + source_extra = list() + if isinstance(source_extra, str): + source_extra = [source_extra] + includes_text = _flags( includes, '-I') defines_text = _flags( defines, '-D') libpaths_text = _flags( libpaths, '/LIBPATH:', '"') if windows() else _flags( libpaths, '-L') @@ -1617,11 +1631,11 @@ def build_extension( os.makedirs( outdir, exist_ok=True) # Run SWIG. - + # if infer_swig_includes: # Extract include flags from `compiler_extra`. swig_includes_extra = '' - compiler_extra_items = compiler_extra.split() + compiler_extra_items = shlex.split(compiler_extra) i = 0 while i < len(compiler_extra_items): item = compiler_extra_items[i] @@ -1656,211 +1670,194 @@ def build_extension( prerequisites_swig2, ) - so_suffix = _so_suffix(use_so_versioning = not py_limited_api) + if pyodide(): + so_suffix = '.so' + log0(f'pyodide: PEP-3149 suffix untested, so omitting. {_so_suffix()=}.') + else: + so_suffix = _so_suffix(use_so_versioning = not py_limited_api) path_so_leaf = f'_{name}{so_suffix}' path_so = f'{outdir}/{path_so_leaf}' py_limited_api2 = current_py_limited_api() if py_limited_api else None - if windows(): - path_obj = f'{path_so}.obj' + compiler_command, pythonflags = base_compiler(cpp=cpp) + linker_command, _ = base_linker(cpp=cpp) + # setuptools on Linux seems to use slightly different compile flags: + # + # -fwrapv -O3 -Wall -O2 -g0 -DPY_CALL_TRAMPOLINE + # + general_flags = '' + if windows(): permissive = '/permissive-' EHsc = '/EHsc' T = '/Tp' if cpp else '/Tc' optimise2 = '/DNDEBUG /O2' if optimise else '/D_DEBUG' - debug2 = '' - if debug: - debug2 = '/Zi' # Generate .pdb. - # debug2 = '/Z7' # Embed debug info in .obj files. - + debug2 = '/Zi' if debug else '' py_limited_api3 = f'/DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else '' - # As of 2023-08-23, it looks like VS tools create slightly - # .dll's each time, even with identical inputs. - # - # Some info about this is at: - # https://nikhilism.com/post/2020/windows-deterministic-builds/. - # E.g. an undocumented linker flag `/Brepro`. - # - - command, pythonflags = base_compiler(cpp=cpp) - command = f''' - {command} - # General: - /c # Compiles without linking. - {EHsc} # Enable "Standard C++ exception handling". - - #/MD # Creates a multithreaded DLL using MSVCRT.lib. - {'/MDd' if debug else '/MD'} - - # Input/output files: - {T}{path_cpp} # /Tp specifies C++ source file. - /Fo{path_obj} # Output file. codespell:ignore - - # Include paths: - {includes_text} - {pythonflags.includes} # Include path for Python headers. - - # Code generation: - {optimise2} - {debug2} - {permissive} # Set standard-conformance mode. - - # Diagnostics: - #/FC # Display full path of source code files passed to cl.exe in diagnostic text. - /W3 # Sets which warning level to output. /W3 is IDE default. - /diagnostics:caret # Controls the format of diagnostic messages. - /nologo # + else: + if debug: + general_flags += '/Zi' if windows() else ' -g' + if optimise: + general_flags += ' /DNDEBUG /O2' if windows() else ' -O2 -DNDEBUG' - {defines_text} - {compiler_extra} + py_limited_api3 = f'-DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else '' - {py_limited_api3} - ''' - run_if( command, path_obj, path_cpp, prerequisites_compile) + if windows(): + pass + elif darwin(): + # MacOS's linker does not like `-z origin`. + rpath_flag = "-Wl,-rpath,@loader_path/" + # Avoid `Undefined symbols for ... "_PyArg_UnpackTuple" ...'. + general_flags += ' -undefined dynamic_lookup' + elif pyodide(): + # Setting `-Wl,-rpath,'$ORIGIN',-z,origin` gives: + # emcc: warning: ignoring unsupported linker flag: `-rpath` [-Wlinkflags] + # wasm-ld: error: unknown -z value: origin + # + rpath_flag = "-Wl,-rpath,'$ORIGIN'" + else: + rpath_flag = "-Wl,-rpath,'$ORIGIN',-z,origin" + + # Fun fact - on Linux, if the -L and -l options are before '{path_cpp}' + # they seem to be ignored... + # + path_os = list() - command, pythonflags = base_linker(cpp=cpp) - debug2 = '/DEBUG' if debug else '' - base, _ = os.path.splitext(path_so_leaf) - command = f''' - {command} - /DLL # Builds a DLL. - /EXPORT:PyInit__{name} # Exports a function. - /IMPLIB:{base}.lib # Overrides the default import library name. - {libpaths_text} - {pythonflags.ldflags} - /OUT:{path_so} # Specifies the output file name. - {debug2} - /nologo - {libs_text} - {path_obj} - {linker_extra} - ''' - run_if( command, path_so, path_obj, prerequisites_link) + for path_source in [path_cpp] + source_extra: + path_o = f'{path_source}.obj' if windows() else f'{path_source}.o' + path_os.append(f' {path_o}') - else: + prerequisites_path = f'{path_o}.d' - # Not Windows. - # - command, pythonflags = base_compiler(cpp=cpp) + if windows(): + compiler_command2 = f''' + {compiler_command} + # General: + /c # Compiles without linking. + {EHsc} # Enable "Standard C++ exception handling". - # setuptools on Linux seems to use slightly different compile flags: - # - # -fwrapv -O3 -Wall -O2 -g0 -DPY_CALL_TRAMPOLINE - # + #/MD # Creates a multithreaded DLL using MSVCRT.lib. + {'/MDd' if debug else '/MD'} - general_flags = '' - if debug: - general_flags += ' -g' - if optimise: - general_flags += ' -O2 -DNDEBUG' + # Input/output files: + {T}{path_source} # /Tp specifies C++ source file. + /Fo{path_o} # Output file. codespell:ignore - py_limited_api3 = f'-DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else '' + # Include paths: + {includes_text} + {pythonflags.includes} # Include path for Python headers. - if darwin(): - # MacOS's linker does not like `-z origin`. - rpath_flag = "-Wl,-rpath,@loader_path/" + # Code generation: + {optimise2} + {debug2} + {permissive} # Set standard-conformance mode. - # Avoid `Undefined symbols for ... "_PyArg_UnpackTuple" ...'. - general_flags += ' -undefined dynamic_lookup' - elif pyodide(): - # Setting `-Wl,-rpath,'$ORIGIN',-z,origin` gives: - # emcc: warning: ignoring unsupported linker flag: `-rpath` [-Wlinkflags] - # wasm-ld: error: unknown -z value: origin - # - log0(f'pyodide: PEP-3149 suffix untested, so omitting. {_so_suffix()=}.') - path_so_leaf = f'_{name}.so' - rpath_flag = "-Wl,-rpath,'$ORIGIN'" - else: - rpath_flag = "-Wl,-rpath,'$ORIGIN',-z,origin" - path_so = f'{outdir}/{path_so_leaf}' - # Fun fact - on Linux, if the -L and -l options are before '{path_cpp}' - # they seem to be ignored... - # - prerequisites = list() + # Diagnostics: + #/FC # Display full path of source code files passed to cl.exe in diagnostic text. + /W3 # Sets which warning level to output. /W3 is IDE default. + /diagnostics:caret # Controls the format of diagnostic messages. + /nologo # - if pyodide(): - # Looks like pyodide's `cc` can't compile and link in one invocation. - prerequisites_compile_path = f'{path_cpp}.o.d' - prerequisites += _get_prerequisites( prerequisites_compile_path) - command = f''' - {command} - -fPIC - {general_flags.strip()} - {pythonflags.includes} - {includes_text} {defines_text} - -MD -MF {prerequisites_compile_path} - -c {path_cpp} - -o {path_cpp}.o {compiler_extra} + {py_limited_api3} ''' - prerequisites_link_path = f'{path_cpp}.o.d' - prerequisites += _get_prerequisites( prerequisites_link_path) - ld, _ = base_linker(cpp=cpp) - command += f''' - && {ld} - {path_cpp}.o - -o {path_so} - -MD -MF {prerequisites_link_path} - {rpath_flag} - {libpaths_text} - {libs_text} - {linker_extra} - {pythonflags.ldflags} - ''' - else: - # We use compiler to compile and link in one command. - prerequisites_path = f'{path_so}.d' - prerequisites = _get_prerequisites(prerequisites_path) - command = f''' - {command} + else: + compiler_command2 = f''' + {compiler_command} -fPIC - -shared {general_flags.strip()} {pythonflags.includes} {includes_text} {defines_text} - {path_cpp} -MD -MF {prerequisites_path} - -o {path_so} + -c {path_source} + -o {path_o} {compiler_extra} - {libpaths_text} - {linker_extra} - {pythonflags.ldflags} - {libs_text} - {rpath_flag} {py_limited_api3} ''' - command_was_run = run_if( - command, - path_so, - path_cpp, - prerequisites_compile, - prerequisites_link, - prerequisites, + run_if( + compiler_command2, + path_o, + path_source, + [path_source] + _get_prerequisites(prerequisites_path), ) - if command_was_run and darwin(): - # We need to patch up references to shared libraries in `libs`. - sublibraries = list() - for lib in () if libs is None else libs: - for libpath in libpaths: - found = list() - for suffix in '.so', '.dylib': - path = f'{libpath}/lib{os.path.basename(lib)}{suffix}' - if os.path.exists( path): - found.append( path) - if found: - assert len(found) == 1, f'More than one file matches lib={lib!r}: {found}' - sublibraries.append( found[0]) - break - else: - log2(f'Warning: can not find path of lib={lib!r} in libpaths={libpaths}') - macos_patch( path_so, *sublibraries) + # Link + prerequisites_path = f'{path_so}.d' + if windows(): + debug2 = '/DEBUG' if debug else '' + base, _ = os.path.splitext(path_so_leaf) + command2 = f''' + {linker_command} + /DLL # Builds a DLL. + /EXPORT:PyInit__{name} # Exports a function. + /IMPLIB:{base}.lib # Overrides the default import library name. + {libpaths_text} + {pythonflags.ldflags} + /OUT:{path_so} # Specifies the output file name. + {debug2} + /nologo + {libs_text} + {' '.join(path_os)} + {linker_extra} + ''' + elif pyodide(): + command2 = f''' + {linker_command} + -MD -MF {prerequisites_path} + -o {path_so} + {' '.join(path_os)} + {libpaths_text} + {libs_text} + {linker_extra} + {pythonflags.ldflags} + {rpath_flag} + ''' + else: + command2 = f''' + {linker_command} + -shared + {general_flags.strip()} + -MD -MF {prerequisites_path} + -o {path_so} + {' '.join(path_os)} + {libpaths_text} + {libs_text} + {linker_extra} + {pythonflags.ldflags} + {rpath_flag} + {py_limited_api3} + ''' + link_was_run = run_if( + command2, + path_so, + path_cpp, + *path_os, + *_get_prerequisites(f'{path_so}.d'), + ) + + if link_was_run and darwin(): + # We need to patch up references to shared libraries in `libs`. + sublibraries = list() + for lib in () if libs is None else libs: + for libpath in libpaths: + found = list() + for suffix in '.so', '.dylib': + path = f'{libpath}/lib{os.path.basename(lib)}{suffix}' + if os.path.exists( path): + found.append( path) + if found: + assert len(found) == 1, f'More than one file matches lib={lib!r}: {found}' + sublibraries.append( found[0]) + break + else: + log2(f'Warning: can not find path of lib={lib!r} in libpaths={libpaths}') + macos_patch( path_so, *sublibraries) #run(f'ls -l {path_so}', check=0) #run(f'file {path_so}', check=0) @@ -2034,88 +2031,96 @@ def git_items( directory, submodules=False): def git_get( - remote, local, *, + remote=None, branch=None, + tag=None, + text=None, depth=1, env_extra=None, - tag=None, update=True, submodules=True, - default_remote=None, ): ''' - Ensures that is a git checkout (at either , or HEAD) - of a remote repository. - - Exactly one of and must be specified, or must start - with 'git:' and match the syntax described below. + Creates/updates local checkout of remote repository and returns + absolute path of . + + If is set but does not start with 'git:', it is assumed to be an up + to date local checkout, and we return absolute path of without doing + any git operations. Args: + local: + Local directory. Created and/or updated using `git clone` and `git + fetch` etc. remote: Remote git repostitory, for example - 'https://github.com/ArtifexSoftware/mupdf.git'. + 'https://github.com/ArtifexSoftware/mupdf.git'. Can be overridden + by . + branch: + Branch to use; can be overridden by . + tag: + Tag to use; can be overridden by . + text: + If None or empty: + Ignored. - If starts with 'git:', the remaining text should be a command-line - style string containing some or all of these args: - --branch - --tag - - These overrides , and . + If starts with 'git:': + The remaining text should be a command-line + style string containing some or all of these args: + --branch + --tag + + These overrides , and . + Otherwise: + is assumed to be a local directory, and we simply return + it as an absolute path without doing any git operations. For example these all clone/update/branch master of https://foo.bar/qwerty.git to local checkout 'foo-local': - git_get('https://foo.bar/qwerty.git', 'foo-local', branch='master') - git_get('git:--branch master https://foo.bar/qwerty.git', 'foo-local') - git_get('git:--branch master', 'foo-local', default_remote='https://foo.bar/qwerty.git') - git_get('git:', 'foo-local', branch='master', default_remote='https://foo.bar/qwerty.git') - - local: - Local directory. If /.git exists, we attempt to run `git - update` in it. - branch: - Branch to use. Is used as default if remote starts with 'git:'. + git_get('foo-local', remote='https://foo.bar/qwerty.git', branch='master') + git_get('foo-local', text='git:--branch master https://foo.bar/qwerty.git') + git_get('foo-local', text='git:--branch master', remote='https://foo.bar/qwerty.git') + git_get('foo-local', text='git:', branch='master', remote='https://foo.bar/qwerty.git') depth: Depth of local checkout when cloning and fetching, or None. env_extra: Dict of extra name=value environment variables to use whenever we run git. - tag: - Tag to use. Is used as default if remote starts with 'git:'. update: If false we do not update existing repository. Might be useful if testing without network access. submodules: If true, we clone with `--recursive --shallow-submodules` and run `git submodule update --init --recursive` before returning. - default_remote: - The remote URL if starts with 'git:' but does not specify - the remote URL. ''' log0(f'{remote=} {local=} {branch=} {tag=}') - if remote.startswith('git:'): - remote0 = remote - args = iter(shlex.split(remote0[len('git:'):])) - remote = default_remote - while 1: - try: - arg = next(args) - except StopIteration: - break - if arg == '--branch': - branch = next(args) - tag = None - elif arg == '--tag': - tag == next(args) - branch = None - else: - remote = arg - assert remote, f'{default_remote=} and no remote specified in remote={remote0!r}.' - assert branch or tag, f'{branch=} {tag=} and no branch/tag specified in remote={remote0!r}.' + + if text: + if text.startswith('git:'): + args = iter(shlex.split(text[len('git:'):])) + while 1: + try: + arg = next(args) + except StopIteration: + break + if arg == '--branch': + branch = next(args) + tag = None + elif arg == '--tag': + tag = next(args) + branch = None + else: + remote = arg + assert remote, f' unset and no remote specified in {text=}.' + assert branch or tag, f' and unset and no branch/tag specified in {text=}.' + else: + log0(f'Using local directory {text!r}.') + return os.path.abspath(text) - assert (branch and not tag) or (not branch and tag), f'Must specify exactly one of and .' + assert (branch and not tag) or (not branch and tag), f'Must specify exactly one of and ; {branch=} {tag=}.' depth_arg = f' --depth {depth}' if depth else '' @@ -2123,7 +2128,7 @@ def do_update(): # This seems to pull in the entire repository. log0(f'do_update(): attempting to update {local=}.') # Remove any local changes. - run(f'cd {local} && git checkout .', env_extra=env_extra) + run(f'cd {local} && git reset --hard', env_extra=env_extra) if tag: # `-u` avoids `fatal: Refusing to fetch into current branch`. # Using '+' and `revs/tags/` prefix seems to avoid errors like: @@ -2171,6 +2176,7 @@ def do_update(): # Show sha of checkout. run( f'cd {local} && git show --pretty=oneline|head -n 1', check=False) + return os.path.abspath(local) def run( @@ -2459,10 +2465,11 @@ def __init__(self): log2(f'### Have removed `-lcrypt` from ldflags: {self.ldflags!r} -> {ldflags2!r}') self.ldflags = ldflags2 - log1(f'{self.includes=}') - log1(f' {includes_=}') - log1(f'{self.ldflags=}') - log1(f' {ldflags_=}') + if 0: + log1(f'{self.includes=}') + log1(f' {includes_=}') + log1(f'{self.ldflags=}') + log1(f' {ldflags_=}') def macos_add_cross_flags(command): @@ -2640,13 +2647,34 @@ def run_if( command, out, *prerequisites): cmd = f.read() else: cmd = None - if command != cmd: + cmd_args = shlex.split(cmd or '') + command_args = shlex.split(command or '') + if command_args != cmd_args: if cmd is None: doit = 'No previous command stored' else: doit = f'Command has changed' if 0: - doit += f': {cmd!r} => {command!r}' + doit += f':\n {cmd!r}\n {command!r}' + if 0: + doit += f'\nbefore:\n' + doit += textwrap.indent(cmd, ' ') + doit += f'\nafter:\n' + doit += textwrap.indent(command, ' ') + if 1: + # Show diff based on commands split into pseudo lines by + # shlex.split(). + doit += ':\n' + lines = difflib.unified_diff( + cmd.split(), + command.split(), + lineterm='', + ) + # Skip initial lines. + assert next(lines) == '--- ' + assert next(lines) == '+++ ' + for line in lines: + doit += f' {line}\n' if not doit: # See whether any prerequisites are newer than target. @@ -2677,7 +2705,7 @@ def _make_prerequisites(p): break if not doit: if pre_mtime > out_mtime: - doit = f'Prerequisite is new: {pre_path!r}' + doit = f'Prerequisite is new: {os.path.abspath(pre_path)!r}' if doit: # Remove `cmd_path` before we run the command, so any failure @@ -2687,16 +2715,16 @@ def _make_prerequisites(p): os.remove( cmd_path) except Exception: pass - log1( f'Running command because: {doit}') + log1( f'Running command because: {doit}', caller=2) - run( command) + run( command, caller=2) # Write the command we ran, into `cmd_path`. with open( cmd_path, 'w') as f: f.write( command) return True else: - log1( f'Not running command because up to date: {out!r}') + log1( f'Not running command because up to date: {out!r}', caller=2) if 0: log2( f'out_mtime={time.ctime(out_mtime)} pre_mtime={time.ctime(pre_mtime)}.' @@ -2988,21 +3016,22 @@ def get(self): for path, id_ in items.items(): id0 = self.items0.get(path) if id0 != id_: - #mtime0, hash0 = id0 - #mtime1, hash1 = id_ - #log0(f'New/modified file {path=}.') - #log0(f' {mtime0=} {"==" if mtime0==mtime1 else "!="} {mtime1=}.') - #log0(f' {hash0=} {"==" if hash0==hash1 else "!="} {hash1=}.') ret.append(path) return ret + def get_n(self, n): + ''' + Returns new files matching , asserting that there are + exactly . + ''' + ret = self.get() + assert len(ret) == n, f'{len(ret)=}: {ret}' + return ret def get_one(self): ''' Returns new match of , asserting that there is exactly one. ''' - ret = self.get() - assert len(ret) == 1, f'{len(ret)=}' - return ret[0] + return self.get_n(1)[0] def _file_id(self, path): mtime = os.stat(path).st_mtime with open(path, 'rb') as f: @@ -3032,7 +3061,7 @@ def swig_get(swig, quick, swig_local='pipcl-swig-git'): Args: swig: - If starts with 'git:', passed as arg to git_remote(). + If starts with 'git:', passed as arg to git_get(). quick: If true, we do not update/build local checkout if the binary is already present. @@ -3040,9 +3069,8 @@ def swig_get(swig, quick, swig_local='pipcl-swig-git'): path to use for checkout. ''' if swig and swig.startswith('git:'): - assert platform.system() != 'Windows' - swig_local = os.path.abspath(swig_local) - # Note that {swig_local}/install/bin/swig doesn't work on MacoS because + assert platform.system() != 'Windows', f'Cannot build swig on Windows.' + # Note that {swig_local}/install/bin/swig doesn't work on MacOS because # {swig_local}/INSTALL is a file and the fs is case-insensitive. swig_binary = f'{swig_local}/install-dir/bin/swig' if quick and os.path.isfile(swig_binary): @@ -3050,10 +3078,10 @@ def swig_get(swig, quick, swig_local='pipcl-swig-git'): else: # Clone swig. swig_env_extra = None - git_get( - swig, + swig_local = git_get( swig_local, - default_remote='https://github.com/swig/swig.git', + text=swig, + remote='https://github.com/swig/swig.git', branch='master', ) if darwin(): @@ -3070,8 +3098,11 @@ def swig_get(swig, quick, swig_local='pipcl-swig-git'): # run(f'brew install bison') PATH = os.environ['PATH'] - PATH = f'/opt/homebrew/opt/bison/bin:{PATH}' + prefix_bison = run('brew --prefix bison', capture=1).strip() + PATH = f'{prefix_bison}/bin:{PATH}' swig_env_extra = dict(PATH=PATH) + run(f'which bison') + run(f'which bison', env_extra=swig_env_extra) # Build swig. run(f'cd {swig_local} && ./autogen.sh', env_extra=swig_env_extra) run(f'cd {swig_local} && ./configure --prefix={swig_local}/install-dir', env_extra=swig_env_extra) @@ -3126,6 +3157,62 @@ def sysconfig_python_flags(): return includes_, ldflags_ +def venv_in(path=None): + ''' + If path is None, returns true if we are in a venv. Otherwise returns true + only if we are in venv . + ''' + if path: + return os.path.abspath(sys.prefix) == os.path.abspath(path) + else: + return sys.prefix != sys.base_prefix + + +def venv_run(args, path, recreate=True, clean=False): + ''' + Runs Python command inside venv and returns termination code. + + Args: + args: + List of args or string command. + path: + Path of venv directory. + recreate: + If false we do not run ` -m venv ` if + already exists. This avoids a delay in the common case where + is already set up, but fails if exists but does not contain + a valid venv. + clean: + If true we first delete . + ''' + if clean: + log(f'Removing any existing venv {path}.') + assert path.startswith('venv-') + shutil.rmtree(path, ignore_errors=1) + if recreate or not os.path.isdir(path): + run(f'{sys.executable} -m venv {path}') + + if isinstance(args, str): + args_string = args + elif platform.system() == 'Windows': + # shlex not reliable on Windows so we use Use crude quoting with "...". + args_string = '' + for i, arg in enumerate(args): + assert '"' not in arg + if i: + args_string += ' ' + args_string += f'"{arg}"' + else: + args_string = shlex.join(args) + + if platform.system() == 'Windows': + command = f'{path}\\Scripts\\activate && python {args_string}' + else: + command = f'. {path}/bin/activate && python {args_string}' + e = run(command, check=0) + return e + + if __name__ == '__main__': # Internal-only limited command line support, used if # graal_legacy_python_config is true. diff --git a/scripts/test.py b/scripts/test.py index ee4b53bbe..2ea847a03 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -249,9 +249,9 @@ --show-args: Show sys.argv and exit. For debugging. - --sync-paths + --sync-paths Do not run anything, instead write required files/directories/checkouts - to stdout, one per line. This is to help with automated running on + to , one per line. This is to help with automated running on remote machines. --system-site-packages 0|1 @@ -520,12 +520,12 @@ def main(argv): elif _mupdf.startswith(':'): _branch = _mupdf[1:] _mupdf = f'git:--branch {_branch} https://github.com/ArtifexSoftware/mupdf.git' - os.environ['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf elif _mupdf.startswith('git:') or '://' in _mupdf: - os.environ['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf else: assert os.path.isdir(_mupdf), f'Not a directory: {_mupdf=}' - os.environ['PYMUPDF_SETUP_MUPDF_BUILD'] = os.path.abspath(_mupdf) + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = os.path.abspath(_mupdf) mupdf_sync = _mupdf elif arg == '--mupdf-clean': @@ -557,7 +557,7 @@ def main(argv): elif arg == '--show-args': show_args = 1 elif arg == '--sync-paths': - sync_paths = True + sync_paths = next(args) elif arg == '--system-site-packages': system_site_packages = int(next(args)) @@ -595,10 +595,11 @@ def main(argv): # Handle special args --sync-paths, -h, -v, -o first. # if sync_paths: - # Just print required files, directories and checkouts. - print(pymupdf_dir) - if mupdf_sync: - print(mupdf_sync) + # Print required files, directories and checkouts. + with open(sync_paths, 'w') as f: + print(pymupdf_dir, file=f) + if mupdf_sync: + print(mupdf_sync, file=f) return if show_help: @@ -634,7 +635,7 @@ def main(argv): if venv == 1 and os.path.exists(pyenv_dir) and os.path.exists(venv_name): log(f'{venv=} and {venv_name=} already exists so not building pyenv or creating venv.') else: - pipcl.git_get('https://github.com/pyenv/pyenv.git', pyenv_dir, branch='master') + pipcl.git_get(pyenv_dir, remote='https://github.com/pyenv/pyenv.git', branch='master') run(f'cd {pyenv_dir} && src/configure && make -C src') run(f'which pyenv') run(f'pyenv install -v -s {graalpy}') @@ -678,6 +679,13 @@ def main(argv): elif command == 'cibw': # Build wheel(s) with cibuildwheel. + + if platform.system() == 'Linux': + PYMUPDF_SETUP_MUPDF_BUILD = env_extra.get('PYMUPDF_SETUP_MUPDF_BUILD') + if PYMUPDF_SETUP_MUPDF_BUILD and not PYMUPDF_SETUP_MUPDF_BUILD.startswith('git:'): + assert PYMUPDF_SETUP_MUPDF_BUILD.startswith('/') + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = f'/host/{PYMUPDF_SETUP_MUPDF_BUILD}' + cibuildwheel( env_extra, cibw_name or 'cibuildwheel', diff --git a/setup.py b/setup.py index fa8eece3f..c2731b1e0 100755 --- a/setup.py +++ b/setup.py @@ -88,11 +88,12 @@ Empty string: Build PyMuPDF with the system MuPDF. A string starting with 'git:': - Use `git clone` to get a MuPDF checkout. We use the - string in the git clone command; it must contain the git - URL from which to clone, and can also contain other `git - clone` args, for example: - PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master https://github.com/ArtifexSoftware/mupdf.git" + We use `git` commands to clone/update a local MuPDF checkout. + Should match `git:[--branch ][--tag ][]`. + If is omitted we use a default. + For example: + PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master" + Passed as arg to pipcl.git_get(). Otherwise: Location of mupdf directory. @@ -425,7 +426,7 @@ def git_patch(directory, patch, hard=False): mupdf_tgz = os.path.abspath( f'{__file__}/../mupdf.tgz') -def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): +def get_mupdf_internal(out, location=None, local_tgz=None): ''' Gets MuPDF as either a .tgz or a local directory. @@ -438,8 +439,6 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): If starts with 'git:', should be remote git location. Otherwise if containing '://' should be URL for .tgz. Otherwise should path of local mupdf checkout. - sha: - If not None and we use git clone, we checkout this sha. local_tgz: If not None, must be local .tgz file. Returns: @@ -451,7 +450,7 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): default location. ''' - log(f'get_mupdf_internal(): {out=} {location=} {sha=}') + log(f'get_mupdf_internal(): {out=} {location=}') assert out in ('dir', 'tgz') if location is None: location = f'https://mupdf.com/downloads/archive/mupdf-{version_mupdf}-source.tar.gz' @@ -465,21 +464,15 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): if local_tgz: assert os.path.isfile(local_tgz) elif location.startswith( 'git:'): - location_git = location[4:] local_dir = 'mupdf-git' + pipcl.git_get(local_dir, text=location, remote='https://github.com/ArtifexSoftware/mupdf.git') - # Try to update existing checkout. - e = run(f'cd {local_dir} && git pull && git submodule update --init', check=False) - if e: - # No existing git checkout, so do a fresh clone. - _fs_remove(local_dir) - gitargs = location[4:] - run(f'git clone --recursive --depth 1 --shallow-submodules {gitargs} {local_dir}') - # Show sha of checkout. - run( f'cd {local_dir} && git show --pretty=oneline|head -n 1', check=False) - if sha: - run( f'cd {local_dir} && git checkout {sha}') + run( + f'cd {local_dir} && git show --pretty=oneline|head -n 1', + check = False, + prefix = 'mupdf git id: ', + ) elif '://' in location: # Download .tgz. local_tgz = os.path.basename( location) @@ -744,10 +737,12 @@ def int_or_0(text): except Exception: return 0 swig_version_tuple = tuple(int_or_0(i) for i in swig_version.split('.')) + version_p_tuple = tuple(int_or_0(i) for i in version_p.split('.')) log(f'{swig_version=}') text = '' text += f'mupdf_location = {mupdf_location!r}\n' text += f'pymupdf_version = {version_p!r}\n' + text += f'pymupdf_version_tuple = {version_p_tuple!r}\n' text += f'pymupdf_git_sha = {sha!r}\n' text += f'pymupdf_git_diff = {diff!r}\n' text += f'pymupdf_git_branch = {branch!r}\n' @@ -1405,9 +1400,6 @@ def platform_release_tuple(): ret.append(libclang) elif openbsd: print(f'OpenBSD: libclang not available via pip; assuming `pkg_add py3-llvm`.') - elif darwin and platform.machine() == 'arm64': - print(f'MacOS/arm64: forcing use of libclang 16.0.6 because 18.1.1 known to fail with `clang.cindex.TranslationUnitLoadError: Error parsing translation unit.`') - ret.append('libclang==16.0.6') elif darwin and platform_release_tuple() < (18,): # There are still of problems when building on old macos. ret.append('libclang==14.0.6') diff --git a/src/__init__.py b/src/__init__.py index 16182e130..c587c4977 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -383,6 +383,7 @@ def _int_rc(text): from ._build import pymupdf_git_diff # noqa F401 from ._build import pymupdf_git_sha # noqa F401 from ._build import pymupdf_version # noqa F401 +from ._build import pymupdf_version_tuple # noqa F401 from ._build import swig_version # noqa F401 from ._build import swig_version_tuple # noqa F401 @@ -393,7 +394,6 @@ def _int_rc(text): # Versions as tuples; useful when comparing versions. # -pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')]) mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')]) assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \ @@ -25452,6 +25452,82 @@ def colors_wx_list(): return _wxcolors +def _mupdf_devel(make_links=True): + ''' + Allows PyMuPDF installation to be used to compile and link programmes that + use the MuPDF C/C++ API. + + Args: + make_links: + If true, then on non-windows we also create softlinks to any shared + libraries that are supplied with a version suffix; this allows them + to be used in a link command. + + For example we create links such as: + + site-packages/pymupdf/ + libmupdf.so -> libmupdf.so.26.7 + libmupdfcpp.so -> libmupdfcpp.so.26.7 + + Returns: (mupdf_include, mupdf_lib). + mupdf_include: + Path of MuPDF include directory within PyMuPDF install. + mupdf_lib + Path of MuPDF library directory within PyMuPDF install. + ''' + import platform + + log(f'{mupdf_version=}') + + p = os.path.normpath(f'{__file__}/..') + + mupdf_include = f'{p}/mupdf-devel/include' + + if platform.system() == 'Windows': + # Separate .lib files are used at build time. + mupdf_lib = f'{p}/mupdf-devel/lib' + else: + # .so files are used for both buildtime and runtime linking. + mupdf_lib = p + log(f'Within installed PyMuPDF:') + log(f' {mupdf_include=}') + log(f' {mupdf_lib=}') + + assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.' + assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.' + + if platform.system() != 'Windows' and make_links: + # Make symbolic links within the installed pymupdf module so + # that ld can find libmupdf.so etc. This is a bit of a hack, but + # necessary because wheels cannot contain symbolic links. + # + # For example we create `libmupdf.so -> libmupdf.so.24.8`. + # + # We are careful to only create symlinks for the expected MuPDF + # version, in case old .so files from a previous install are still + # in place. + # + log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.') + regex_suffix = mupdf_version.split('.')[1:3] + regex_suffix = '[.]'.join(regex_suffix) + mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$' + log(f'{mupdf_lib_regex=}.') + for leaf in os.listdir(mupdf_lib): + m = re.match(mupdf_lib_regex, leaf) + if m: + pfrom = f'{mupdf_lib}/{m.group(1)}' + # os.path.exists() can return false if softlink exists + # but points to non-existent file, so we also use + # `os.path.islink()`. + if os.path.islink(pfrom) or os.path.exists(pfrom): + log(f'Removing existing link {pfrom=}.') + os.remove(pfrom) + log(f'Creating symlink: {pfrom} -> {leaf}') + os.symlink(leaf, pfrom) + + return mupdf_include, mupdf_lib + + # We cannot import utils earlier because it imports this .py file itself and # uses some pymupdf.* types in function typing. # diff --git a/tests/test_annots.py b/tests/test_annots.py index fa98216e7..834d85cdc 100644 --- a/tests/test_annots.py +++ b/tests/test_annots.py @@ -257,11 +257,13 @@ def test_1645(): ) doc.save(path_out, garbage=1, deflate=True, no_new_id=True) print(f'Have created {path_out}. comparing with {path_expected}.') - with open( path_out, 'rb') as f: - out = f.read() - with open( path_expected, 'rb') as f: - expected = f.read() - assert out == expected, f'Files differ: {path_out} {path_expected}' + with pymupdf.open(path_expected) as doc_expected, pymupdf.open(path_out) as doc_out: + rms = gentle_compare.pixmaps_rms( + doc_expected[0].get_pixmap(), + doc_out[0].get_pixmap(), + ) + print(f'test_1645: {rms=}') + assert rms < 0.1, f'Pixmaps differ: {path_expected=} {path_out=}' finally: # Restore annot_stem. pymupdf.TOOLS.set_annot_stem(annot_stem) diff --git a/tests/test_general.py b/tests/test_general.py index 8091023ef..97227ce51 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -2082,3 +2082,31 @@ def test_4590(): # Check pymupdf.Document.scrub() works. with pymupdf.open(path) as document: document.scrub() + + +def test_4702(): + path = util.download( + 'https://github.com/user-attachments/files/22403483/01995b6ca7837b52abaa24e38e8c076d.pdf', + 'test_4702.pdf', + ) + with pymupdf.open(path) as document: + for xref in range(1, document.xref_length()): + print(f'{xref=}') + try: + _ = document.xref_object(xref) + except Exception as e1: + print(f'{e1=}') + try: + document.update_object(xref, "<<>>") + except Exception as e2: + print(f'{e2=}') + raise + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'repairing PDF document' + + with pymupdf.open(path) as document: + for xref in range(1, document.xref_length()): + print(f'{xref=}') + _ = document.xref_object(xref) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'repairing PDF document'