From 8eea4eff20393dbc0c3e0b5723f1eaa3b61a248d Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Tue, 6 Jun 2017 00:00:06 +0100 Subject: [PATCH 1/8] [Trivial] White space, comments and line-wrap only in zipimport module. --- .../modules/zipimport/PyZipImporter.java | 70 ++++++++++++------- .../modules/zipimport/ZipImportModule.java | 28 ++++---- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index 202bfaf6c..405ee57eb 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -1,3 +1,4 @@ +/* Copyright (c) 2017 Jython Developers */ package org.python.modules.zipimport; import org.python.Version; @@ -37,15 +38,19 @@ @ExposedType(name = "zipimporter") public class PyZipImporter extends PyObject { + public static final PyType TYPE = PyType.fromClass(PyZipImporter.class); + /** Path to the Zip archive */ @ExposedGet public String archive; + /** File prefix: "a/sub/directory/" */ @ExposedGet public String prefix; - @ExposedGet + /** Dict with file info {path: tocEntry} */ + @ExposedGet(name = "_files") public PyObject files; public PyZipImporter(PyType type) { @@ -61,9 +66,14 @@ public PyZipImporter(String archivePath, String prefix, PyObject files) { @ExposedNew final static PyObject zipimporter_new(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { + PyObject[] args, String[] keywords) { ArgParser ap = new ArgParser("zipimporter", args, keywords, "archivepath"); String archivePath = ap.getString(0); + /* + * archivepath may be e.g. foo/bar.zip/lib, meaning to look for modules in the lib directory + * inside the ZIP file foo/bar.zip (provided that it exists). We must separate the archive + * (ZIP file) proper from the starting path within it, which is known as the "prefix". + */ Path archive = Paths.get(archivePath); while (archive != null) { if (Files.isRegularFile(archive)) { @@ -74,12 +84,17 @@ final static PyObject zipimporter_new(PyNewWrapper new_, boolean init, PyType su if (archive == null) { throw Py.ImportError(String.format("cannot handle %s", archivePath)); } + + // Expand to absolute path of ZIP as key to cache of files within it String filename = archive.toAbsolutePath().toString(); PyObject files = ZipImportModule._zip_directory_cache.__finditem__(filename); if (files == null) { files = readDirectory(filename); ZipImportModule._zip_directory_cache.__setitem__(filename, files); } + + // Recover the path within the archive from the original archivepath. + // XXX: possible bug: filename is absolute but archivePath might not be. Path.relativize? String prefix = ""; if (!filename.equals(archivePath)) { prefix = archivePath.substring(filename.length() + 1); @@ -98,8 +113,10 @@ public final PyObject zipimporter_exec_module(PyObject module) { } /** - * CPython zipimport module is very outdated, it's not yet compliant with PEP-451, the specs are checking the old behaviour - * this method and a few others that are deprecated a simply implemented to satisfy the test suite + * CPython zipimport module is very outdated, it's not yet compliant with PEP-451, the specs are + * checking the old behaviour this method and a few others that are deprecated a simply + * implemented to satisfy the test suite + * * @param fullname * @return a python module */ @@ -123,7 +140,6 @@ public final PyObject zipimporter_load_module(String fullname) { }); } - @ExposedMethod public final PyObject zipimporter_is_package(String fullname) { return getEntry(fullname, (entry, input) -> Py.newBoolean(entry._package)); @@ -162,16 +178,15 @@ public final PyObject zipimporter_get_data(String filename) { if (zipFile != null) { try { zipFile.close(); - } catch (IOException e) { - } + } catch (IOException e) {} } } } -// @ExposedMethod -// public final PyObject zipimporter_get_data(String fullname) { -// throw ZipImportModule.ZipImportError(fullname); -// } +// @ExposedMethod +// public final PyObject zipimporter_get_data(String fullname) { +// throw ZipImportModule.ZipImportError(fullname); +// } @ExposedMethod public final PyObject zipimporter_get_filename(String fullname) { @@ -190,17 +205,20 @@ public final PyObject zipimporter_get_code(String fullname) { try { codeBytes = imp.readCode(fullname, inputStream, false, mtime); } catch (IOException ioe) { - throw Py.ImportError(ioe.getMessage() + "[path=" + entry.path(fullname) + "]"); + throw Py.ImportError( + ioe.getMessage() + "[path=" + entry.path(fullname) + "]"); } } else { try { byte[] bytes = FileUtil.readBytes(inputStream); - codeBytes = imp.compileSource(fullname, new ByteArrayInputStream(bytes), entry.path(fullname)); + codeBytes = imp.compileSource(fullname, new ByteArrayInputStream(bytes), + entry.path(fullname)); } catch (IOException e) { throw ZipImportModule.ZipImportError(e.getMessage()); } } - return BytecodeLoader.makeCode(fullname + Version.PY_CACHE_TAG, codeBytes, entry.path(fullname)); + return BytecodeLoader.makeCode(fullname + Version.PY_CACHE_TAG, codeBytes, + entry.path(fullname)); }); } catch (IOException e) { throw ZipImportModule.ZipImportError(e.getMessage()); @@ -249,8 +267,7 @@ private T getEntry(String fullname, BiFunction if (zipFile != null) { try { zipFile.close(); - } catch (IOException e) { - } + } catch (IOException e) {} } } } @@ -272,14 +289,16 @@ private static PyObject readDirectory(String archive) { PySystemState sys = Py.getSystemState(); File file = new File(sys.getPath(archive)); if (!file.canRead()) { - throw ZipImportModule.ZipImportError(String.format("can't open Zip file: '%s'", archive)); + throw ZipImportModule + .ZipImportError(String.format("can't open Zip file: '%s'", archive)); } ZipFile zipFile; try { zipFile = new ZipFile(file); } catch (IOException ioe) { - throw ZipImportModule.ZipImportError(String.format("can't read Zip file: '%s'", archive)); + throw ZipImportModule + .ZipImportError(String.format("can't read Zip file: '%s'", archive)); } PyObject files = new PyDictionary(); @@ -300,6 +319,7 @@ private static PyObject readDirectory(String archive) { * * A tocEntry is a tuple: * + *
      *     (__file__,     # value to use for __file__, available for all files
      *     compress,      # compression kind; 0 for uncompressed
      *     data_size,     # size of compressed data on disk
@@ -309,16 +329,17 @@ private static PyObject readDirectory(String archive) {
      *     date,          # mod data of file (in dos format)
      *     crc,           # crc checksum of the data
      *     )
+     *
* - * Directories can be recognized by the trailing SEP in the name, data_size and - * file_offset are 0. + * Directories can be recognized by the trailing os.sep in the name, + * data_size and file_offset are 0. * * @param zipFile ZipFile to read * @param files a dict-like PyObject */ private static void readZipFile(ZipFile zipFile, PyObject files) { - for (Enumeration zipEntries = zipFile.entries(); - zipEntries.hasMoreElements();) { + for (Enumeration zipEntries = zipFile.entries(); zipEntries + .hasMoreElements();) { ZipEntry zipEntry = zipEntries.nextElement(); String name = zipEntry.getName().replace('/', File.separatorChar); @@ -334,13 +355,14 @@ private static void readZipFile(ZipFile zipFile, PyObject files) { PyObject date = new PyLong(zipEntry.getTime()); PyObject crc = new PyLong(zipEntry.getCrc()); - PyTuple entry = new PyTuple(file, compress, data_size, file_size, file_offset, - time, date, crc); + PyTuple entry = + new PyTuple(file, compress, data_size, file_size, file_offset, time, date, crc); files.__setitem__(new PyUnicode(name), entry); } } class ModuleEntry { + private boolean _package; private boolean binary; diff --git a/src/org/python/modules/zipimport/ZipImportModule.java b/src/org/python/modules/zipimport/ZipImportModule.java index bb01411ea..db9b7218c 100644 --- a/src/org/python/modules/zipimport/ZipImportModule.java +++ b/src/org/python/modules/zipimport/ZipImportModule.java @@ -1,20 +1,18 @@ -/* Copyright (c) 2007 Jython Developers */ +/* Copyright (c) 2017 Jython Developers */ package org.python.modules.zipimport; import org.python.core.BuiltinDocs; -import org.python.core.ClassDictInit; import org.python.core.Py; import org.python.core.PyDictionary; import org.python.core.PyException; import org.python.core.PyObject; -import org.python.core.PyBytes; import org.python.core.PyStringMap; import org.python.expose.ExposedModule; import org.python.expose.ModuleInit; /** - * This module adds the ability to import Python modules (*.py, - * *$py.class) and packages from ZIP-format archives. + * This module adds the ability to import Python modules (*.py, + * *$py.class) and packages from ZIP-format archives. * * @author Philip Jenvey */ @@ -22,11 +20,13 @@ public class ZipImportModule { public static PyObject ZipImportError; + public static PyException ZipImportError(String message) { return new PyException(ZipImportError, message); } - // FIXME this cache should be per PySystemState, but at the very least it should also be weakly referenced! + // FIXME this cache should be per PySystemState, but at the very least it should also be weakly + // referenced! // FIXME could also do this via a loading cache instead public static PyDictionary _zip_directory_cache = new PyDictionary(); @@ -37,15 +37,15 @@ public static void init(PyObject dict) { dict.__setitem__("ZipImportError", ZipImportError); } - /** - * Initialize the ZipImportError exception during startup - * - */ + /** Initialize the ZipImportError exception during startup */ public static void initClassExceptions(PyObject exceptions) { PyObject ImportError = exceptions.__finditem__("ImportError"); - ZipImportError = Py.makeClass("zipimport.ZipImportError", ImportError, - new PyStringMap() {{ - __setitem__("__module__", Py.newString("zipimport")); - }}); + + ZipImportError = Py.makeClass("zipimport.ZipImportError", ImportError, new PyStringMap() { + + { + __setitem__("__module__", Py.newString("zipimport")); + } + }); } } From 8ec87205c96392950a72b6c4d21a819a3c810ac8 Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Tue, 6 Jun 2017 19:34:20 +0100 Subject: [PATCH 2/8] Expose PyZipImporter through CoreExposed.includes This cures the constructor-related exception on attempts at importing modules, but there are still problems with PyZipImporter. Also improved some comments. --- CoreExposed.includes | 2 +- src/org/python/modules/zipimport/PyZipImporter.java | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CoreExposed.includes b/CoreExposed.includes index 5ba76ff30..661cc7e43 100644 --- a/CoreExposed.includes +++ b/CoreExposed.includes @@ -144,7 +144,7 @@ org/python/modules/subprocess/PyPopen.class org/python/modules/thread/PyLocal.class org/python/modules/time/PyTimeTuple.class org/python/modules/unicodedata/UCD.class -org/python/modules/zipimport/zipimporter.class +org/python/modules/zipimport/PyZipImporter.class org/python/modules/zlib/PyCompress.class org/python/modules/zlib/PyDecompress.class org/python/antlr/AST.class diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index 405ee57eb..d491271d2 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -3,6 +3,7 @@ import org.python.Version; import org.python.core.ArgParser; +import org.python.core.BuiltinDocs; import org.python.core.BytecodeLoader; import org.python.core.Py; import org.python.core.PyBytes; @@ -36,20 +37,23 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -@ExposedType(name = "zipimporter") +@ExposedType(name = "zipimport.zipimporter", doc = BuiltinDocs.zipimport_zipimporter_doc) public class PyZipImporter extends PyObject { public static final PyType TYPE = PyType.fromClass(PyZipImporter.class); - /** Path to the Zip archive */ + /** + * Path to the ZIP archive: "path/to/archive.zip" if constructed from + * "path/to/archive.zip/a/subdir". + */ @ExposedGet public String archive; - /** File prefix: "a/sub/directory/" */ + /** File prefix: "a/subdir/" if constructed from "path/to/archive.zip/a/subdir". */ @ExposedGet public String prefix; - /** Dict with file info {path: tocEntry} */ + /** Dictionary with file information {path: tocEntry} */ @ExposedGet(name = "_files") public PyObject files; From 5ed6d3749c9126bf522fa72db9af60777502fabc Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Mon, 12 Jun 2017 22:40:56 +0100 Subject: [PATCH 3/8] Re-work PyZipImoprter construction closer to CPython and add unit test. We use java.nio.Path extensively to achieve platform independence and to support (and preserve) relative paths exactly as CPython does. Because Jython does not yet work well enough to run standard unit tests, we start a thorough Java-based test. --- .../modules/zipimport/PyZipImporter.java | 201 +++++++++----- .../zipimport/PyZipImporterDerived.java | 4 +- src/templates/zipimporter.derived | 2 +- .../modules/zipimport/ZipImportTest.java | 249 ++++++++++++++++++ 4 files changed, 387 insertions(+), 69 deletions(-) create mode 100644 tests/java/org/python/modules/zipimport/ZipImportTest.java diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index d491271d2..db5c8b951 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -14,7 +14,6 @@ import org.python.core.PyModule; import org.python.core.PyNewWrapper; import org.python.core.PyObject; -import org.python.core.PySystemState; import org.python.core.PyTuple; import org.python.core.PyType; import org.python.core.PyUnicode; @@ -44,69 +43,119 @@ public class PyZipImporter extends PyObject { /** * Path to the ZIP archive: "path/to/archive.zip" if constructed from - * "path/to/archive.zip/a/subdir". + * "path/to/archive.zip/a/sub/directory". */ + // XXX In CPython it is "decoded from the FS encoding" (by PyUnicode_FSDecoder, during __init__) @ExposedGet - public String archive; + public final String archive; - /** File prefix: "a/subdir/" if constructed from "path/to/archive.zip/a/subdir". */ + /** + * File prefix: "a/sub/directory/", if constructed from "path/to/archive.zip/a/sub/directory". + */ + // XXX here CPython says "encoded to the FS encoding" but that doesn't seem to be accurate @ExposedGet - public String prefix; + public final String prefix; - /** Dictionary with file information {path: tocEntry} */ + /** Dictionary with information on each file in the ZIP {path: tocEntry} */ @ExposedGet(name = "_files") - public PyObject files; + public final PyObject files; - public PyZipImporter(PyType type) { - super(TYPE); + /** + * Construct a PyZipImporter for the given path, which may include sub-directories + * within the ZIP file, for example path/to/archive.zip/a/sub/directory. Because + * equivalence between ZIP files and sub-directories in Python import (see + * PEP 273), a + * PyZipImporter operates using the platform-specific file separator ("\" on + * Windows) at all public interfaces, so on that platform a path like + * path\to\archive.zip\a\sub\directory will normally be supplied. However, we + * follow CPython in tolerating either "/" or the platform-specific file separator in the + * archivePath, or indeed any mixture of the two. + * + * @param archivePath path to archive and optionally a sub-directory within it + */ + public PyZipImporter(String archivePath) { + this(TYPE, archivePath); } - public PyZipImporter(String archivePath, String prefix, PyObject files) { - this(TYPE); - this.archive = archivePath; - this.prefix = prefix; - this.files = files; - } + /** + * Equivalent to {@link PyZipImporter#PyZipImporter(String)} when sub-class required. + * + * @param type of sub-class + * @param archivePath path to archive and optionally a sub-directory within it + */ + public PyZipImporter(PyType type, String archivePath) { + super(type); + + if (archivePath == null || archivePath.length() == 0) { + throw ZipImportModule.ZipImportError("archive path is empty"); + } + archivePath = toPlatformSeparator(archivePath); - @ExposedNew - final static PyObject zipimporter_new(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { - ArgParser ap = new ArgParser("zipimporter", args, keywords, "archivepath"); - String archivePath = ap.getString(0); /* - * archivepath may be e.g. foo/bar.zip/lib, meaning to look for modules in the lib directory - * inside the ZIP file foo/bar.zip (provided that it exists). We must separate the archive - * (ZIP file) proper from the starting path within it, which is known as the "prefix". + * archivepath may be e.g. "path/to/archive.zip/a/sub/directory", meaning we must look for + * modules in the lib directory inside the ZIP file path/to/archive.zip (provided that it + * exists). We must separate the archive (ZIP file) proper from the starting path within it, + * which is known as the "prefix". */ - Path archive = Paths.get(archivePath); - while (archive != null) { + Path fullPath = Paths.get(archivePath), archive = fullPath; + int prefixEnd = archive.getNameCount(); + int prefixStart = prefixEnd; + + // Strip elements from end of path until empty, a file or a directory + for (archive = fullPath; prefixStart > 1; archive = archive.getParent(), prefixStart--) { if (Files.isRegularFile(archive)) { break; + } else if (Files.isDirectory(archive)) { + // Stripping names got us to a directory: no ZIP file here + archive = null; + break; } - archive = archive.getParent(); } - if (archive == null) { - throw Py.ImportError(String.format("cannot handle %s", archivePath)); + + if (archive == null || archive.getNameCount() == 0) { + throw ZipImportModule.ZipImportError(String.format("not a Zip file: %s", archivePath)); } - // Expand to absolute path of ZIP as key to cache of files within it - String filename = archive.toAbsolutePath().toString(); - PyObject files = ZipImportModule._zip_directory_cache.__finditem__(filename); + // Look up, or add if necessary, an entry in the cache for the files. + this.archive = archive.toString(); + PyObject files = ZipImportModule._zip_directory_cache.__finditem__(this.archive); if (files == null) { - files = readDirectory(filename); - ZipImportModule._zip_directory_cache.__setitem__(filename, files); + /* + * This is new. Make a cache entry that enumerates the files in the ZIP. This is also + * where we throw if we can't read it as a ZIP. + */ + files = readDirectory(archive); + ZipImportModule._zip_directory_cache.__setitem__(this.archive, files); } + this.files = files; - // Recover the path within the archive from the original archivepath. - // XXX: possible bug: filename is absolute but archivePath might not be. Path.relativize? - String prefix = ""; - if (!filename.equals(archivePath)) { - prefix = archivePath.substring(filename.length() + 1); - if (!prefix.endsWith(File.separator)) { - prefix += File.separator; - } + // The prefix is that part of the original archivePath that is not in archive. + if (prefixStart < prefixEnd) { + // There was a prefix + Path prefix = fullPath.subpath(prefixStart, prefixEnd); + this.prefix = prefix.toString() + File.separator; + } else { + this.prefix = ""; } - return new PyZipImporter(filename, prefix, files); + } + + /** + * __new__ method equivalent to {@link PyZipImporter#PyZipImporter(String)}. + */ + @ExposedNew + final static PyObject zipimporter_new(PyNewWrapper new_, boolean init, PyType subtype, + PyObject[] args, String[] keywords) { + ArgParser ap = new ArgParser("zipimporter", args, keywords, "archivepath"); + String archivePath = ap.getString(0); + // XXX Should FS-decode args[0] here. CPython uses PyUnicode_FSDecoder, during __init__ + return new PyZipImporter(archivePath); + } + + + @Override + public String toString() { + Path archivePath = Paths.get(archive, prefix); + return String.format("", archivePath); } @ExposedMethod @@ -254,6 +303,32 @@ final PyObject zipimporter_find_spec(PyObject[] args, String[] keywords) { }); } + /** + * Return path with every `\`character replaced by {@link File#pathSeparatorChar}, if that's + * different. + */ + private static String toPlatformSeparator(String path) { + if (File.separatorChar == '/') { + return path; + } else { + return path.replace('/', File.separatorChar); + } + } + + /** + * Return path with every {@link File#pathSeparatorChar} character replaced by `\`, if that's + * different. We need this because Python treats paths into zip files as equivalent to paths in + * the file system, hence localises the separator to the platform, while zip files themselves + * use '/' consistently internally. + */ + private static String fromPlatformSeparator(String path) { + if (File.separatorChar == '/') { + return path; + } else { + return path.replace(File.separatorChar, '/'); + } + } + private T getEntry(String fullname, BiFunction func) { ZipFile zipFile = null; try { @@ -289,33 +364,21 @@ private ModuleEntry[] entries() { return res; } - private static PyObject readDirectory(String archive) { - PySystemState sys = Py.getSystemState(); - File file = new File(sys.getPath(archive)); - if (!file.canRead()) { + private static PyObject readDirectory(Path archive) { + + if (!Files.isReadable(archive)) { throw ZipImportModule .ZipImportError(String.format("can't open Zip file: '%s'", archive)); } - ZipFile zipFile; - try { - zipFile = new ZipFile(file); + try (ZipFile zipFile = new ZipFile(archive.toFile())) { + PyObject files = new PyDictionary(); + readZipFile(zipFile, files); + return files; } catch (IOException ioe) { throw ZipImportModule .ZipImportError(String.format("can't read Zip file: '%s'", archive)); } - - PyObject files = new PyDictionary(); - try { - readZipFile(zipFile, files); - } finally { - try { - zipFile.close(); - } catch (IOException ioe) { - throw Py.IOError(ioe); - } - } - return files; } /** @@ -342,18 +405,23 @@ private static PyObject readDirectory(String archive) { * @param files a dict-like PyObject */ private static void readZipFile(ZipFile zipFile, PyObject files) { + // Iterate over the entries and build an informational tuple for each + final String zipNameAndSep = zipFile.getName() + File.separator; for (Enumeration zipEntries = zipFile.entries(); zipEntries .hasMoreElements();) { + // Oh for Java 9 and Enumeration.asIterator() ZipEntry zipEntry = zipEntries.nextElement(); - String name = zipEntry.getName().replace('/', File.separatorChar); + String name = toPlatformSeparator(zipEntry.getName()); + // XXX: Java zip file uses UTF-8 internally. Is there an encoding issue here? + PyObject file = new PyUnicode(zipNameAndSep + name); - PyObject file = new PyUnicode(zipFile.getName() + File.separator + name); PyObject compress = new PyLong(zipEntry.getMethod()); PyObject data_size = new PyLong(zipEntry.getCompressedSize()); PyObject file_size = new PyLong(zipEntry.getSize()); - // file_offset is a CPython optimization; it's used to seek directly to the - // file when reading it later. Jython doesn't do this nor is the offset - // available + /* + * file_offset is a CPython optimization; it's used to seek directly to the file when + * reading it later. Jython doesn't do this nor is the offset available + */ PyObject file_offset = new PyLong(-1); PyObject time = new PyLong(zipEntry.getTime()); PyObject date = new PyLong(zipEntry.getTime()); @@ -361,6 +429,7 @@ private static void readZipFile(ZipFile zipFile, PyObject files) { PyTuple entry = new PyTuple(file, compress, data_size, file_size, file_offset, time, date, crc); + files.__setitem__(new PyUnicode(name), entry); } } diff --git a/src/org/python/modules/zipimport/PyZipImporterDerived.java b/src/org/python/modules/zipimport/PyZipImporterDerived.java index 1ac0b76fb..928283ff7 100644 --- a/src/org/python/modules/zipimport/PyZipImporterDerived.java +++ b/src/org/python/modules/zipimport/PyZipImporterDerived.java @@ -73,8 +73,8 @@ public void delDict() { dict=new PyStringMap(); } - public PyZipImporterDerived(PyType subtype) { - super(subtype); + public PyZipImporterDerived(PyType subtype,String archivePath) { + super(subtype,archivePath); slots=new PyObject[subtype.getNumSlots()]; dict=subtype.instDict(); if (subtype.needsFinalizer()) { diff --git a/src/templates/zipimporter.derived b/src/templates/zipimporter.derived index 405e14148..55862bff9 100644 --- a/src/templates/zipimporter.derived +++ b/src/templates/zipimporter.derived @@ -1,4 +1,4 @@ base_class: PyZipImporter want_dict: true -ctr: +ctr: String archivePath incl: object diff --git a/tests/java/org/python/modules/zipimport/ZipImportTest.java b/tests/java/org/python/modules/zipimport/ZipImportTest.java new file mode 100644 index 000000000..d73d7af34 --- /dev/null +++ b/tests/java/org/python/modules/zipimport/ZipImportTest.java @@ -0,0 +1,249 @@ +/* Copyright (c) 2017 Jython Developers */ +package org.python.modules.zipimport; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.File; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.python.core.PyDictionary; +import org.python.core.PyObject; +import org.python.core.PyTuple; +import org.python.core.PyUnicode; + +/** + * Java level tests for {@link PyZipImporter}. If this module does not meet its expected behaviours, + * Jython pretty much won't start, so a test independent of the interpreter seems a good idea. + */ +public class ZipImportTest { + + /** Shorthand for platform-specific file separator character */ + static final char SEP = File.separatorChar; + + /** In the standard library is a ZIP file at this (Unix-style) location: */ + static final String ARCHIVE = + platform("dist/Lib/test/test_importlib/namespace_pkgs/top_level_portion1.zip"); + + // (Relative) paths of sub-directories to the ZIP file and files within them. + static final String FOOKEY = platform("foo/"); + static final String ONEKEY = platform("foo/one.py"); + + @Before + public void setUp() throws Exception {} + + /** Swap '/' for platform file name separator if different. */ + private static String platform(String path) { + return (SEP == '/') ? path : path.replace('/', SEP); + } + + @After + public void tearDown() throws Exception { + // Empty the cache for a clean start next time + PyDictionary cache = ZipImportModule._zip_directory_cache; + for (PyObject item : cache.asIterable()) { + cache.__delitem__(item); + } + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#PyZipImporter(java.lang.String)} where the + * archive path is relative to the working directory. + */ + @Test + public void testPyZipImporterRelative() { + testPyZipImporterHelper(ARCHIVE); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#PyZipImporter(java.lang.String)}. where the + * archive path is absolute. + */ + @Test + public void testPyZipImporterAbsolute() { + // Make the zip file location absolute + Path path = Paths.get(ARCHIVE); + testPyZipImporterHelper(path.toAbsolutePath().toString()); + } + + /** + * Test the constructor using a given path as the path to the base archive (just the zip). + * + * @param archive path to zip (which may be absolute or relative) + */ + private void testPyZipImporterHelper(String archive) { + + // Test where the archive path is just the zip file location + PyZipImporter za = testPyZipImporterHelper(archive, archive, ""); + + // Test where the archive path is extended with a sub-directory within the zip + String fooPath = Paths.get(archive, FOOKEY).toString(); + PyZipImporter zf = testPyZipImporterHelper(fooPath, archive, FOOKEY); + assertEquals("cache entry not re-used", za.files, zf.files); + + // If the platform is not Unix-like, check that a Unix-like path has the same result + if (SEP != '/') { + // Turn platform-specific archive path to Unix-like + String archiveAlt = archive.replace(SEP, '/'); + // Expected result still uses platform-specific separators (in API though not in ZIP). + PyZipImporter za2 = testPyZipImporterHelper(archiveAlt, archive, ""); + assertEquals("cache entry not re-used", za.files, za2.files); + String fooPathAlt = fooPath.replace(SEP, '/'); + PyZipImporter zf2 = testPyZipImporterHelper(fooPathAlt, archive, FOOKEY); + assertEquals("cache entry not re-used", za.files, zf2.files); + } + + // Check that an extra SEP on the end does not upset the constructor + PyZipImporter zax = testPyZipImporterHelper(archive+SEP, archive, ""); + assertEquals("cache entry not re-used", za.files, zax.files); + PyZipImporter zfx = testPyZipImporterHelper(fooPath+SEP, archive, FOOKEY); + assertEquals("cache entry not re-used", za.files, zfx.files); + + } + + /** + * Helper to test construction of a zipimporter. + * + * @param archivePath argument to zipimporter constructor + * @param archive expected base archive name + * @param prefix expected prefix (sub-directory within ZIP) + * @return the zipimporter we created for further tests + */ + private PyZipImporter testPyZipImporterHelper(String archivePath, String archive, + String prefix) { + + PyZipImporter z = new PyZipImporter(archivePath); + assertEquals(archive, z.archive); + assertEquals(prefix, z.prefix); + + // The files attribute should enumerate the files and directories. + Map map = mapFromFiles(z.files); + + // Check in the map that we got what we should + assertEquals(2, map.size()); + String fooPath = Paths.get(archive,FOOKEY).toString() + SEP; // subdir gets SEP + assertEquals(fooPath, map.get(FOOKEY).__getitem__(0).toString()); + String onePath = Paths.get(archive,ONEKEY).toString(); // file gets no SEP + assertEquals(onePath, map.get(ONEKEY).__getitem__(0).toString()); + + return z; + } + + /** Copy the _files attribute into a Java map for testing. */ + private Map mapFromFiles(PyObject files) { + Map map = new HashMap<>(); + for (PyObject item : files.asIterable()) { + String key = ((PyUnicode) item).getString(); + PyTuple value = ((PyTuple) files.__finditem__(item)); + map.put(key, value); + } + return map; + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_exec_module(org.python.core.PyObject)}. + */ + // @Test + public void testZipimporter_exec_module() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_load_module(java.lang.String)}. + */ + // @Test + public void testZipimporter_load_module() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_is_package(java.lang.String)}. + */ + // @Test + public void testZipimporter_is_package() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_source(java.lang.String)}. + */ + // @Test + public void testZipimporter_get_source() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_data(java.lang.String)}. + */ + // @Test + public void testZipimporter_get_data() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_filename(java.lang.String)}. + */ + // @Test + public void testZipimporter_get_filename() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_code(java.lang.String)}. + */ + // @Test + public void testZipimporter_get_code() { + fail("Not yet implemented"); + } + + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_find_spec(org.python.core.PyObject[], java.lang.String[])}. + */ + // @Test + public void testZipimporter_find_spec() { + fail("Not yet implemented"); + } + + /** + * Test method for {@link java.lang.Object#toString()}. + */ + @Test + public void test__repr__() { + + // Test relative path + PyZipImporter z = new PyZipImporter(ARCHIVE); + String expected = String.format("", ARCHIVE); + PyUnicode actual = z.__repr__(); + assertEquals(expected, actual.getString()); + + // Test absolute path + String abspath = Paths.get(ARCHIVE).toAbsolutePath().toString(); + z = new PyZipImporter(abspath); + expected = String.format("", abspath); + actual = z.__repr__(); + assertEquals(expected, actual.getString()); + + // Test path with subfolder + String fooPath = Paths.get(ARCHIVE, FOOKEY).toString(); // Strips trailing SEP in FOOKEY + z = new PyZipImporter(fooPath); + expected = String.format("", fooPath); + actual = z.__repr__(); + assertEquals(expected, actual.getString()); + } + +} From c47bf0c3761ed249a5c9f4cc3c22c3853b7e31de Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Wed, 14 Jun 2017 20:23:59 +0100 Subject: [PATCH 4/8] Add comment specifying zipimporter.get_data and a test The test expresses the behaviour of get_data in CPython 3.5.2, and fails in every call at present (on Windows), so is disabled in the unit test. --- .../modules/zipimport/PyZipImporter.java | 32 +++++++++-- .../modules/zipimport/ZipImportTest.java | 57 ++++++++++++++++--- 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index db5c8b951..8e1acfbf9 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -212,8 +212,35 @@ public final PyObject zipimporter_get_source(String fullname) { }); } + /** + * Given a path to a file within this archive, retrieve the contents of that file as bytes. By + * "a path to a file within this archive" we mean either that the path begins with the archive + * name (and the rest of it identifies a file within the archive), or that it identifies a file + * within the archive (i.e. is relative to this archive). Note that even when the zipimporter + * was constructed with a sub-directory as target, a path not beginning with the archive path is + * interpreted relative to the base archive, not to the sub-directory: + * + *
+     * >>> zf = zi(archive+"/foo")
+     * >>> zf.get_data(archive + "/foo/one.py")
+     * b"attr = 'portion1 foo one'\n"
+     * >>> zf.get_data("foo/one.py")
+     * b"attr = 'portion1 foo one'\n"
+     * >>> zf.get_data("one.py")
+     * Traceback (most recent call last):
+     * File "", line 1, in 
+     * OSError: [Errno 0] Error: 'one.py'
+     * 
+ * + * Note also that even where the platform file path separator differs from '/' (i.e. on + * Windows), either that or '/' is acceptable in this context. + * + * @param filename to the file within the archive + * @return the contents + */ @ExposedMethod public final PyObject zipimporter_get_data(String filename) { + // XXX Possibly filename should be an object and if byte-like FS-decoded. ZipFile zipFile = null; if (filename.startsWith(archive)) { filename = filename.substring(archive.length() + 1); @@ -236,11 +263,6 @@ public final PyObject zipimporter_get_data(String filename) { } } -// @ExposedMethod -// public final PyObject zipimporter_get_data(String fullname) { -// throw ZipImportModule.ZipImportError(fullname); -// } - @ExposedMethod public final PyObject zipimporter_get_filename(String fullname) { return getEntry(fullname, (entry, inputStream) -> { diff --git a/tests/java/org/python/modules/zipimport/ZipImportTest.java b/tests/java/org/python/modules/zipimport/ZipImportTest.java index d73d7af34..e1c6d89de 100644 --- a/tests/java/org/python/modules/zipimport/ZipImportTest.java +++ b/tests/java/org/python/modules/zipimport/ZipImportTest.java @@ -5,13 +5,17 @@ import static org.junit.Assert.fail; import java.io.File; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; + import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.python.core.PyBytes; import org.python.core.PyDictionary; import org.python.core.PyObject; import org.python.core.PyTuple; @@ -42,6 +46,11 @@ private static String platform(String path) { return (SEP == '/') ? path : path.replace('/', SEP); } + /** Swap platform file name separator for '/' if different. */ + private static String unplatform(String path) { + return (SEP == '/') ? path : path.replace(SEP, '/'); + } + @After public void tearDown() throws Exception { // Empty the cache for a clean start next time @@ -91,19 +100,19 @@ private void testPyZipImporterHelper(String archive) { // If the platform is not Unix-like, check that a Unix-like path has the same result if (SEP != '/') { // Turn platform-specific archive path to Unix-like - String archiveAlt = archive.replace(SEP, '/'); + String archiveAlt = unplatform(archive); // Expected result still uses platform-specific separators (in API though not in ZIP). PyZipImporter za2 = testPyZipImporterHelper(archiveAlt, archive, ""); assertEquals("cache entry not re-used", za.files, za2.files); - String fooPathAlt = fooPath.replace(SEP, '/'); + String fooPathAlt = unplatform(fooPath); PyZipImporter zf2 = testPyZipImporterHelper(fooPathAlt, archive, FOOKEY); assertEquals("cache entry not re-used", za.files, zf2.files); } // Check that an extra SEP on the end does not upset the constructor - PyZipImporter zax = testPyZipImporterHelper(archive+SEP, archive, ""); + PyZipImporter zax = testPyZipImporterHelper(archive + SEP, archive, ""); assertEquals("cache entry not re-used", za.files, zax.files); - PyZipImporter zfx = testPyZipImporterHelper(fooPath+SEP, archive, FOOKEY); + PyZipImporter zfx = testPyZipImporterHelper(fooPath + SEP, archive, FOOKEY); assertEquals("cache entry not re-used", za.files, zfx.files); } @@ -128,9 +137,9 @@ private PyZipImporter testPyZipImporterHelper(String archivePath, String archive // Check in the map that we got what we should assertEquals(2, map.size()); - String fooPath = Paths.get(archive,FOOKEY).toString() + SEP; // subdir gets SEP + String fooPath = Paths.get(archive, FOOKEY).toString() + SEP; // subdir gets SEP assertEquals(fooPath, map.get(FOOKEY).__getitem__(0).toString()); - String onePath = Paths.get(archive,ONEKEY).toString(); // file gets no SEP + String onePath = Paths.get(archive, ONEKEY).toString(); // file gets no SEP assertEquals(onePath, map.get(ONEKEY).__getitem__(0).toString()); return z; @@ -187,9 +196,43 @@ public void testZipimporter_get_source() { * Test method for * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_data(java.lang.String)}. */ + // Fails at present // @Test public void testZipimporter_get_data() { - fail("Not yet implemented"); + + // Compose a reference result (long-windedly: PyBytes(ByteBuffer) required!) + String ONE_TEXT = "attr = 'portion1 foo one'\n"; + ByteBuffer buf = Charset.forName("UTF-8").encode(ONE_TEXT); + byte[] bytes = new byte[buf.remaining()]; + buf.get(bytes); + PyBytes expected = new PyBytes(bytes); + + // Test with construction from base archive and access by path within + check_get_data(ARCHIVE, ONEKEY, expected); + + // Test with construction from base archive and access by path including archive + String onePath = Paths.get(ARCHIVE).resolve(Paths.get(ONEKEY)).toString(); + check_get_data(ARCHIVE, onePath, expected); + + // Test with construction from sub-directory archive and access by path within + String fooPath = Paths.get(ARCHIVE, FOOKEY).toString(); + check_get_data(fooPath, ONEKEY, expected); + + // Test with construction from sub-directory archive and access by path including archive + check_get_data(fooPath, onePath, expected); + + if (SEP != '/') { + // Test again, but with '/' in place of SEP in the path presented to get_data + check_get_data(ARCHIVE, unplatform(onePath), expected); + check_get_data(fooPath, unplatform(ONEKEY), expected); + } + } + + /** Helper for {@link #testZipimporter_get_data()} **/ + private void check_get_data(String archivePath, String name, PyBytes expected) { + PyZipImporter z = new PyZipImporter(archivePath); + PyBytes actual = (PyBytes) z.zipimporter_get_data(name); + assertEquals(expected, actual); } /** From 4d008f0b3804d91aa6c8e3d2a755dd4e7ec619ab Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Thu, 15 Jun 2017 21:44:06 +0100 Subject: [PATCH 5/8] Conform behaviour of zipimporter.get_data() to CPython and enable test. --- .../modules/zipimport/PyZipImporter.java | 72 +++++++++---------- .../modules/zipimport/ZipImportTest.java | 7 +- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index 8e1acfbf9..3fd59574b 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -1,6 +1,19 @@ /* Copyright (c) 2017 Jython Developers */ package org.python.modules.zipimport; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Enumeration; +import java.util.function.BiFunction; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + import org.python.Version; import org.python.core.ArgParser; import org.python.core.BuiltinDocs; @@ -24,18 +37,6 @@ import org.python.expose.ExposedNew; import org.python.expose.ExposedType; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Enumeration; -import java.util.function.BiFunction; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; - @ExposedType(name = "zipimport.zipimporter", doc = BuiltinDocs.zipimport_zipimporter_doc) public class PyZipImporter extends PyObject { @@ -235,31 +236,25 @@ public final PyObject zipimporter_get_source(String fullname) { * Note also that even where the platform file path separator differs from '/' (i.e. on * Windows), either that or '/' is acceptable in this context. * - * @param filename to the file within the archive + * @param path to the file within the archive * @return the contents */ @ExposedMethod - public final PyObject zipimporter_get_data(String filename) { + public final PyObject zipimporter_get_data(String path) { // XXX Possibly filename should be an object and if byte-like FS-decoded. - ZipFile zipFile = null; - if (filename.startsWith(archive)) { - filename = filename.substring(archive.length() + 1); + // The path may begin with the archive name as stored, in which case discard it. + if (toPlatformSeparator(path).startsWith(archive)) { + path = path.substring(archive.length() + 1); } - try { - zipFile = new ZipFile(new File(archive)); - ZipEntry zipEntry = zipFile.getEntry(prefix + filename); - if (zipEntry != null) { - return new PyBytes(FileUtil.readBytes(zipFile.getInputStream(zipEntry))); - } - throw ZipImportModule.ZipImportError(filename); - } catch (IOException e) { - throw ZipImportModule.ZipImportError(e.getMessage()); - } finally { - if (zipFile != null) { - try { - zipFile.close(); - } catch (IOException e) {} + try (ZipFile zipFile = new ZipFile(new File(archive))) { + // path is now definitely relative to the archive but may not use / as separator. + ZipEntry zipEntry = zipFile.getEntry(fromPlatformSeparator(path)); + if (zipEntry == null) { + throw new FileNotFoundException(path); } + return new PyBytes(FileUtil.readBytes(zipFile.getInputStream(zipEntry))); + } catch (IOException ioe) { + throw Py.IOError(ioe); } } @@ -330,10 +325,11 @@ final PyObject zipimporter_find_spec(PyObject[] args, String[] keywords) { * different. */ private static String toPlatformSeparator(String path) { - if (File.separatorChar == '/') { - return path; - } else { + // Cunningly avoid making a new String if possible. + if (File.separatorChar != '/' && path.contains("/")) { return path.replace('/', File.separatorChar); + } else { + return path; } } @@ -344,10 +340,11 @@ private static String toPlatformSeparator(String path) { * use '/' consistently internally. */ private static String fromPlatformSeparator(String path) { - if (File.separatorChar == '/') { - return path; - } else { + // Cunningly avoid making a new String if possible. + if (File.separatorChar != '/' && path.contains(File.separator)) { return path.replace(File.separatorChar, '/'); + } else { + return path; } } @@ -434,7 +431,6 @@ private static void readZipFile(ZipFile zipFile, PyObject files) { // Oh for Java 9 and Enumeration.asIterator() ZipEntry zipEntry = zipEntries.nextElement(); String name = toPlatformSeparator(zipEntry.getName()); - // XXX: Java zip file uses UTF-8 internally. Is there an encoding issue here? PyObject file = new PyUnicode(zipNameAndSep + name); PyObject compress = new PyLong(zipEntry.getMethod()); diff --git a/tests/java/org/python/modules/zipimport/ZipImportTest.java b/tests/java/org/python/modules/zipimport/ZipImportTest.java index e1c6d89de..9c9324db9 100644 --- a/tests/java/org/python/modules/zipimport/ZipImportTest.java +++ b/tests/java/org/python/modules/zipimport/ZipImportTest.java @@ -24,6 +24,10 @@ /** * Java level tests for {@link PyZipImporter}. If this module does not meet its expected behaviours, * Jython pretty much won't start, so a test independent of the interpreter seems a good idea. + * + * Python exceptions are not handled correctly in this test because the Python classes involved are + * not initialised. So where a PyException might have been raised, a Java NullPointerException tends + * to result. */ public class ZipImportTest { @@ -196,8 +200,7 @@ public void testZipimporter_get_source() { * Test method for * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_data(java.lang.String)}. */ - // Fails at present - // @Test + @Test public void testZipimporter_get_data() { // Compose a reference result (long-windedly: PyBytes(ByteBuffer) required!) From db133ffb93647ef86fea191a9d0558117fc2f66a Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Sat, 17 Jun 2017 16:23:58 +0100 Subject: [PATCH 6/8] Add comments (including Javadoc) to PyZipImporter and re-order tests. Also re-named some local members. The objective is (only) to be clear about the specification of the methods, how it works internally. Un-expose some methods that do not have CPython counterparts. All preparatory to systematic tests. --- .../modules/zipimport/PyZipImporter.java | 137 +++++++++++++++--- .../modules/zipimport/ZipImportTest.java | 106 +++++++------- 2 files changed, 163 insertions(+), 80 deletions(-) diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index 3fd59574b..10f9a6879 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -63,11 +63,11 @@ public class PyZipImporter extends PyObject { /** * Construct a PyZipImporter for the given path, which may include sub-directories - * within the ZIP file, for example path/to/archive.zip/a/sub/directory. Because - * equivalence between ZIP files and sub-directories in Python import (see + * within the ZIP file, for example path/to/archive.zip/a/sub/directory. Because of + * the equivalence between ZIP files and sub-directories in Python import (see * PEP 273), a * PyZipImporter operates using the platform-specific file separator ("\" on - * Windows) at all public interfaces, so on that platform a path like + * Windows) at all public interfaces. On Windows, a path like * path\to\archive.zip\a\sub\directory will normally be supplied. However, we * follow CPython in tolerating either "/" or the platform-specific file separator in the * archivePath, or indeed any mixture of the two. @@ -152,14 +152,14 @@ final static PyObject zipimporter_new(PyNewWrapper new_, boolean init, PyType su return new PyZipImporter(archivePath); } - @Override public String toString() { Path archivePath = Paths.get(archive, prefix); - return String.format("", archivePath); + return String.format("", archivePath); } - @ExposedMethod + /** There's no exec_module in CPython zipimporter. */ + @Deprecated // @ExposedMethod public final PyObject zipimporter_exec_module(PyObject module) { PyModule mod = (PyModule) module; String fullname = mod.__findattr__("__name__").asString(); @@ -167,11 +167,14 @@ public final PyObject zipimporter_exec_module(PyObject module) { } /** + * Load the module specified by fullname, |a fully qualified (dotted) module name. + * Return the imported module, or raise ZipImportError if it wasn’t found. + *

* CPython zipimport module is very outdated, it's not yet compliant with PEP-451, the specs are * checking the old behaviour this method and a few others that are deprecated a simply * implemented to satisfy the test suite * - * @param fullname + * @param fullname fully qualified (dotted) module name. * @return a python module */ @Deprecated @@ -181,12 +184,12 @@ public final PyObject zipimporter_load_module(String fullname) { PyModule mod = imp.addModule(fullname); imp.createFromCode(fullname, (PyCode) zipimporter_get_code(fullname)); String folder = archive + File.separator + prefix; - if (entry._package) { + if (entry.isPackage) { PyList pkgPath = new PyList(); pkgPath.append(new PyUnicode(folder + entry.dir(fullname))); mod.__setattr__("__path__", pkgPath); } - if (entry.binary) { + if (entry.isBinary) { mod.__setattr__("__cached__", new PyUnicode(folder + entry.path(fullname))); } mod.__setattr__("__file__", new PyUnicode(folder + entry.sourcePath(fullname))); @@ -194,18 +197,34 @@ public final PyObject zipimporter_load_module(String fullname) { }); } + /** + * Return True if the module specified by fullname is a package. Raise + * ZipImportError if the module couldn't be found. + * + * @param fullname fully qualified (dotted) module name. + * @return Python True if a package or False + */ @ExposedMethod public final PyObject zipimporter_is_package(String fullname) { - return getEntry(fullname, (entry, input) -> Py.newBoolean(entry._package)); + return getEntry(fullname, (entry, input) -> Py.newBoolean(entry.isPackage)); } + /** + * Return the source code for the specified module. Raise ZipImportError if the + * module couldn't be found, and return None if the archive does contain the module, but has no + * source for it. + * + * @param fullname fully qualified (dotted) module name. + * @return the source as a unicode + */ @ExposedMethod public final PyObject zipimporter_get_source(String fullname) { return getEntry(fullname, (entry, inputStream) -> { try { - if (entry.binary) { + if (entry.isBinary) { return Py.None; } + // FIXME: encoding? Is there a utility the compiler uses? return new PyUnicode(FileUtil.readBytes(inputStream)); } catch (IOException e) { throw ZipImportModule.ZipImportError(e.getMessage()); @@ -258,6 +277,14 @@ public final PyObject zipimporter_get_data(String path) { } } + /** + * Return the value __file__ would be set to if the specified module were imported. + * Raise ZipImportError if the module couldn't be found. + * + * @param fullname fully qualified (dotted) module name. + * @return file name as Python unicode + */ + // XXX Should the return be FS-encoded bytes? @ExposedMethod public final PyObject zipimporter_get_filename(String fullname) { return getEntry(fullname, (entry, inputStream) -> { @@ -265,13 +292,20 @@ public final PyObject zipimporter_get_filename(String fullname) { }); } + /** + * Return the code object for the specified module. Raise + * ZipImportError if the module couldn't be found. + * + * @param fullname fully qualified (dotted) module name. + * @return corresponding code object + */ @ExposedMethod public final PyObject zipimporter_get_code(String fullname) { try { long mtime = Files.getLastModifiedTime(new File(archive).toPath()).toMillis(); return getEntry(fullname, (entry, inputStream) -> { byte[] codeBytes; - if (entry.binary) { + if (entry.isBinary) { try { codeBytes = imp.readCode(fullname, inputStream, false, mtime); } catch (IOException ioe) { @@ -295,7 +329,17 @@ public final PyObject zipimporter_get_code(String fullname) { } } - @ExposedMethod + /** + * Find a spec for the specified module within the the archive. If a + * spec cannot be found, None is returned. When passed in, + * target is a module object that the finder may use to make a decision about what + * ModuleSpec to return. + *

+ * Disabled: find_spec is not implemented in zipimport.zipimporter in + * CPython 3.5 as far as we can tell, and by not having it exposed, we should get fall-back + * behaviour depending on find_module. + */ + @Deprecated // @ExposedMethod final PyObject zipimporter_find_spec(PyObject[] args, String[] keywords) { ArgParser ap = new ArgParser("find_spec", args, keywords, "fullname", "path", "target"); String fullname = ap.getString(0); @@ -305,13 +349,13 @@ final PyObject zipimporter_find_spec(PyObject[] args, String[] keywords) { PyObject spec = moduleSpec.__call__(new PyUnicode(fullname), this); return getEntry(fullname, (entry, inputStream) -> { String folder = archive + File.separatorChar + prefix; - if (entry._package) { + if (entry.isPackage) { PyList pkgpath = new PyList(); pkgpath.add(new PyUnicode(folder + entry.dir(fullname))); spec.__setattr__("submodule_search_locations", pkgpath); spec.__setattr__("is_package", Py.True); } - if (entry.binary) { + if (entry.isBinary) { spec.__setattr__("cached", new PyUnicode(folder + entry.path(fullname))); } spec.__setattr__("origin", new PyUnicode(folder + entry.sourcePath(fullname))); @@ -348,16 +392,29 @@ private static String fromPlatformSeparator(String path) { } } + /** + * Try to find a ZipEntry in this {@link #archive} corresponding to the given fully-qualified + * (dotted) module name, amongst the four search possibilities in order. For the first entry + * with a name that fits, open the content as a stream of bytes, and return the result of + * applying a supplied function to the ModuleEntry type constant and that stream. + * + * @param fullname fully qualified (dotted) module name. + * @param func to perform on first match + * @return return from application of func + */ private T getEntry(String fullname, BiFunction func) { ZipFile zipFile = null; try { zipFile = new ZipFile(new File(archive)); for (ModuleEntry entry : entries()) { + // FIXME (?) entry.path (nor prefix) contains no parent module names. + // FIXME ZipFile responds to paths with / not platform separator ZipEntry zipEntry = zipFile.getEntry(prefix + entry.path(fullname)); if (zipEntry != null) { return func.apply(entry, zipFile.getInputStream(zipEntry)); } } + // No name matches the module throw ZipImportModule.ZipImportError(fullname); } catch (IOException e) { throw ZipImportModule.ZipImportError(e.getMessage()); @@ -370,6 +427,12 @@ private T getEntry(String fullname, BiFunction } } + /** + * Return an array of four (newly-created) instances of nested class {@link ModuleEntry}, in the + * sequence: ModuleEntry(true, true), ModuleEntry(true, false), + * ModuleEntry(false, true), ModuleEntry(false, false). This is the + * order in which we try to identify something to load corresponding to a module name. + */ private ModuleEntry[] entries() { boolean[] options = {true, false}; ModuleEntry[] res = new ModuleEntry[4]; @@ -379,10 +442,12 @@ private ModuleEntry[] entries() { res[i++] = new ModuleEntry(pack, bin); } } - return res; } + /** + * Create a dictionary of the "files" within the archive at the given Path. + */ private static PyObject readDirectory(Path archive) { if (!Files.isReadable(archive)) { @@ -452,23 +517,36 @@ private static void readZipFile(ZipFile zipFile, PyObject files) { } } + /** + * A class having 4 possible values representing package-or-not and binary-or-not, which are the + * 4 ways to find the form of a module we can load. + */ class ModuleEntry { - private boolean _package; - private boolean binary; + private boolean isPackage; + private boolean isBinary; - ModuleEntry(boolean pack, boolean bin) { - _package = pack; - binary = bin; + ModuleEntry(boolean isPackage, boolean isBinary) { + this.isPackage = isPackage; + this.isBinary = isBinary; } + /** + * Given a simple module name, or the (last element of a dotted) module name, into a file + * name for the corresponding binary or source, calculate what file we're looking for, + * according to the settings of isPackage or isBinary. + * + * @param fullname fully qualified (dotted) module name (e.g. "path.to.mymodule") + * @return one of the four strings mymodule(/__init__|).(py|class) + */ String path(String name) { StringBuilder res = new StringBuilder(); + // Append the last element of the module res.append(name.substring(name.lastIndexOf('.') + 1)); - if (_package) { + if (isPackage) { res.append(File.separatorChar + "__init__"); } - if (binary) { + if (isBinary) { res.append(".class"); } else { res.append(".py"); @@ -476,12 +554,23 @@ String path(String name) { return res.toString(); } + /** + * Directory of package or "" if not this.isPackage. + * + * @param fullname fully qualified (dotted) module name (e.g. "path.to.mymodule") + * @return "mymodule" or "" + */ String dir(String name) { return path(name).replaceFirst("/__init__\\.(py|class)$", ""); } + /** + * Return the same as {@link #path(String)} but as if isBinary==false. + * + * @return "module/__init__.py" or "module.py". + */ String sourcePath(String name) { - return new ModuleEntry(_package, false).path(name); + return new ModuleEntry(isPackage, false).path(name); } } } diff --git a/tests/java/org/python/modules/zipimport/ZipImportTest.java b/tests/java/org/python/modules/zipimport/ZipImportTest.java index 9c9324db9..29d3558b9 100644 --- a/tests/java/org/python/modules/zipimport/ZipImportTest.java +++ b/tests/java/org/python/modules/zipimport/ZipImportTest.java @@ -161,39 +161,30 @@ private Map mapFromFiles(PyObject files) { } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_exec_module(org.python.core.PyObject)}. + * Test method for {@link java.lang.Object#toString()}. */ - // @Test - public void testZipimporter_exec_module() { - fail("Not yet implemented"); - } + @Test + public void test__repr__() { - /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_load_module(java.lang.String)}. - */ - // @Test - public void testZipimporter_load_module() { - fail("Not yet implemented"); - } + // Test relative path + PyZipImporter z = new PyZipImporter(ARCHIVE); + String expected = String.format("", ARCHIVE); + PyUnicode actual = z.__repr__(); + assertEquals(expected, actual.getString()); - /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_is_package(java.lang.String)}. - */ - // @Test - public void testZipimporter_is_package() { - fail("Not yet implemented"); - } + // Test absolute path + String abspath = Paths.get(ARCHIVE).toAbsolutePath().toString(); + z = new PyZipImporter(abspath); + expected = String.format("", abspath); + actual = z.__repr__(); + assertEquals(expected, actual.getString()); - /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_source(java.lang.String)}. - */ - // @Test - public void testZipimporter_get_source() { - fail("Not yet implemented"); + // Test path with subfolder + String fooPath = Paths.get(ARCHIVE, FOOKEY).toString(); // Strips trailing SEP in FOOKEY + z = new PyZipImporter(fooPath); + expected = String.format("", fooPath); + actual = z.__repr__(); + assertEquals(expected, actual.getString()); } /** @@ -240,56 +231,59 @@ private void check_get_data(String archivePath, String name, PyBytes expected) { /** * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_filename(java.lang.String)}. + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_load_module(java.lang.String)}. */ // @Test - public void testZipimporter_get_filename() { + public void testZipimporter_load_module() { fail("Not yet implemented"); } /** * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_code(java.lang.String)}. + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_is_package(java.lang.String)}. */ // @Test - public void testZipimporter_get_code() { + public void testZipimporter_is_package() { fail("Not yet implemented"); } /** * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_find_spec(org.python.core.PyObject[], java.lang.String[])}. + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_source(java.lang.String)}. */ // @Test - public void testZipimporter_find_spec() { + public void testZipimporter_get_source() { fail("Not yet implemented"); } /** - * Test method for {@link java.lang.Object#toString()}. + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_filename(java.lang.String)}. */ - @Test - public void test__repr__() { - - // Test relative path - PyZipImporter z = new PyZipImporter(ARCHIVE); - String expected = String.format("", ARCHIVE); - PyUnicode actual = z.__repr__(); - assertEquals(expected, actual.getString()); + // @Test + public void testZipimporter_get_filename() { + fail("Not yet implemented"); + } - // Test absolute path - String abspath = Paths.get(ARCHIVE).toAbsolutePath().toString(); - z = new PyZipImporter(abspath); - expected = String.format("", abspath); - actual = z.__repr__(); - assertEquals(expected, actual.getString()); + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_code(java.lang.String)}. + */ + // @Test + public void testZipimporter_get_code() { + fail("Not yet implemented"); + } - // Test path with subfolder - String fooPath = Paths.get(ARCHIVE, FOOKEY).toString(); // Strips trailing SEP in FOOKEY - z = new PyZipImporter(fooPath); - expected = String.format("", fooPath); - actual = z.__repr__(); - assertEquals(expected, actual.getString()); + /** + * Test method for + * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_find_spec(org.python.core.PyObject[], java.lang.String[])}. + * + * Disabled test. find_spec is not implemented in zipimport.zipimporter in CPython 3.5 as far as we can tell, + * and by not having it exposed, we should get fall-back behaviour depending on find_module. + */ + // @Test + public void testZipimporter_find_spec() { + //fail("Not yet implemented"); } } From b0431b1661dfd4ff0154578d975ca2057ef1cd7a Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Sat, 17 Jun 2017 20:18:16 +0100 Subject: [PATCH 7/8] Have PyZipImportTest create its own test material. --- .../modules/zipimport/ZipImportTest.java | 120 +++++++++++++++--- 1 file changed, 101 insertions(+), 19 deletions(-) diff --git a/tests/java/org/python/modules/zipimport/ZipImportTest.java b/tests/java/org/python/modules/zipimport/ZipImportTest.java index 29d3558b9..ad49fdb1c 100644 --- a/tests/java/org/python/modules/zipimport/ZipImportTest.java +++ b/tests/java/org/python/modules/zipimport/ZipImportTest.java @@ -4,16 +4,26 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; +import java.io.BufferedOutputStream; import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; import java.nio.ByteBuffer; +import java.nio.channels.Channels; import java.nio.charset.Charset; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import org.junit.After; +import org.junit.AfterClass; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.python.core.PyBytes; import org.python.core.PyDictionary; @@ -35,26 +45,88 @@ public class ZipImportTest { static final char SEP = File.separatorChar; /** In the standard library is a ZIP file at this (Unix-style) location: */ - static final String ARCHIVE = - platform("dist/Lib/test/test_importlib/namespace_pkgs/top_level_portion1.zip"); + static final String ARCHIVE = platform("tmpdir_ZipImportTest/sub/test.zip"); + static final Path ARCHIVE_PATH = Paths.get(ARCHIVE); + + /** The file structure in the zip. Separators are '/', irrespective of platform. */ + // @formatter:off + static final String[] STRUCTURE = { + "sub/dir/foo/", // FOO + "sub/dir/foo/x/one.py", // ONE + "a/b/c/", + "a/b/c/__init__.py", + "a/b/c/two.py", + "a/b/three.py", + "four.py" + }; + static final int FOO = 0, ONE = 1; // Where they are in STRUCTURE + // @formatter:on // (Relative) paths of sub-directories to the ZIP file and files within them. - static final String FOOKEY = platform("foo/"); - static final String ONEKEY = platform("foo/one.py"); + static final String FOOKEY = platform(STRUCTURE[FOO]); + static final String ONEKEY = platform(STRUCTURE[ONE]); - @Before - public void setUp() throws Exception {} + static final Charset UTF8 = Charset.forName("UTF-8"); - /** Swap '/' for platform file name separator if different. */ - private static String platform(String path) { - return (SEP == '/') ? path : path.replace('/', SEP); + /** The file contents in the zip. */ + static final String[] CONTENT = new String[STRUCTURE.length]; + + @BeforeClass + public static void createTestZip() throws IOException { + + // Ensure directories exist to the archive location + if (ARCHIVE_PATH.getNameCount() > 1) { + Files.createDirectories(ARCHIVE_PATH.getParent()); + } + + // Create (or overwrite) the raw archive file itself + OutputStream out = new BufferedOutputStream(Files.newOutputStream(ARCHIVE_PATH)); + + // Wrap it so there are two inputs: one for ZipEntry objects and one for text to encode + try (ZipOutputStream zip = new ZipOutputStream(out); + Writer text = Channels.newWriter(Channels.newChannel(zip), "UTF-8")) { + + // Make an entry for each path mentioned in STRUCTURE + for (int i = 0; i < STRUCTURE.length; i++) { + + // The structure table gives us names for the entries + String name = STRUCTURE[i]; + ZipEntry entry = new ZipEntry(name); + zip.putNextEntry(entry); + + // And we make up some content like this ... + String content = null; + if (name.endsWith("/")) { + // Directory entry: no content + // XXX What happens about intermediate directories? + } else { + if (name.endsWith(".py")) { + // Content is Python source + content = String.format("# %s\n", name); + } else { + content = String.format("Contents of %s\n", name); + } + CONTENT[i] = content; + text.write(content); + text.flush(); + } + } + } } - /** Swap platform file name separator for '/' if different. */ - private static String unplatform(String path) { - return (SEP == '/') ? path : path.replace(SEP, '/'); + @AfterClass + public static void deleteTestZip() { + try { + // Useful to look at from Python! + Files.deleteIfExists(ARCHIVE_PATH); + } catch (IOException e) { + // Meh! + } } + @Before + public void setUp() throws Exception {} + @After public void tearDown() throws Exception { // Empty the cache for a clean start next time @@ -64,6 +136,16 @@ public void tearDown() throws Exception { } } + /** Swap '/' for platform file name separator if different. */ + private static String platform(String path) { + return (SEP == '/') ? path : path.replace('/', SEP); + } + + /** Swap platform file name separator for '/' if different. */ + private static String unplatform(String path) { + return (SEP == '/') ? path : path.replace(SEP, '/'); + } + /** * Test method for * {@link org.python.modules.zipimport.PyZipImporter#PyZipImporter(java.lang.String)} where the @@ -96,7 +178,7 @@ private void testPyZipImporterHelper(String archive) { // Test where the archive path is just the zip file location PyZipImporter za = testPyZipImporterHelper(archive, archive, ""); - // Test where the archive path is extended with a sub-directory within the zip + // Test where the archive path is extended with a sub-directory "foo" within the zip String fooPath = Paths.get(archive, FOOKEY).toString(); PyZipImporter zf = testPyZipImporterHelper(fooPath, archive, FOOKEY); assertEquals("cache entry not re-used", za.files, zf.files); @@ -140,7 +222,7 @@ private PyZipImporter testPyZipImporterHelper(String archivePath, String archive Map map = mapFromFiles(z.files); // Check in the map that we got what we should - assertEquals(2, map.size()); + assertEquals(STRUCTURE.length, map.size()); String fooPath = Paths.get(archive, FOOKEY).toString() + SEP; // subdir gets SEP assertEquals(fooPath, map.get(FOOKEY).__getitem__(0).toString()); String onePath = Paths.get(archive, ONEKEY).toString(); // file gets no SEP @@ -195,8 +277,7 @@ public void test__repr__() { public void testZipimporter_get_data() { // Compose a reference result (long-windedly: PyBytes(ByteBuffer) required!) - String ONE_TEXT = "attr = 'portion1 foo one'\n"; - ByteBuffer buf = Charset.forName("UTF-8").encode(ONE_TEXT); + ByteBuffer buf = UTF8.encode(CONTENT[ONE]); byte[] bytes = new byte[buf.remaining()]; buf.get(bytes); PyBytes expected = new PyBytes(bytes); @@ -278,12 +359,13 @@ public void testZipimporter_get_code() { * Test method for * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_find_spec(org.python.core.PyObject[], java.lang.String[])}. * - * Disabled test. find_spec is not implemented in zipimport.zipimporter in CPython 3.5 as far as we can tell, - * and by not having it exposed, we should get fall-back behaviour depending on find_module. + * Disabled test. find_spec is not implemented in + * zipimport.zipimporter in CPython 3.5 as far as we can tell, and by not having it + * exposed, we should get fall-back behaviour depending on find_module. */ // @Test public void testZipimporter_find_spec() { - //fail("Not yet implemented"); + // fail("Not yet implemented"); } } From 0b14ff2517fd1bb727a9978b12442223bf9f830f Mon Sep 17 00:00:00 2001 From: Jeff Allen Date: Mon, 17 Jul 2017 22:43:02 +0100 Subject: [PATCH 8/8] Corrections to zipimporter.get_filename and a test. Test against careful observation of equivalent behaviour in CPython, when a .py file name should be returned and when a .class. --- .../modules/zipimport/PyZipImporter.java | 28 +- .../modules/zipimport/ZipImportTest.java | 367 +++++++++++++++--- 2 files changed, 317 insertions(+), 78 deletions(-) diff --git a/src/org/python/modules/zipimport/PyZipImporter.java b/src/org/python/modules/zipimport/PyZipImporter.java index 10f9a6879..7a817ccb2 100644 --- a/src/org/python/modules/zipimport/PyZipImporter.java +++ b/src/org/python/modules/zipimport/PyZipImporter.java @@ -215,7 +215,7 @@ public final PyObject zipimporter_is_package(String fullname) { * source for it. * * @param fullname fully qualified (dotted) module name. - * @return the source as a unicode + * @return the source as a str */ @ExposedMethod public final PyObject zipimporter_get_source(String fullname) { @@ -282,12 +282,12 @@ public final PyObject zipimporter_get_data(String path) { * Raise ZipImportError if the module couldn't be found. * * @param fullname fully qualified (dotted) module name. - * @return file name as Python unicode + * @return file name as Python str */ - // XXX Should the return be FS-encoded bytes? @ExposedMethod public final PyObject zipimporter_get_filename(String fullname) { return getEntry(fullname, (entry, inputStream) -> { + // FIXME: this is the name in theory, but what if it isn't present in the ZIP? return new PyUnicode(archive + File.separator + prefix + entry.sourcePath(fullname)); }); } @@ -407,9 +407,8 @@ private T getEntry(String fullname, BiFunction try { zipFile = new ZipFile(new File(archive)); for (ModuleEntry entry : entries()) { - // FIXME (?) entry.path (nor prefix) contains no parent module names. - // FIXME ZipFile responds to paths with / not platform separator - ZipEntry zipEntry = zipFile.getEntry(prefix + entry.path(fullname)); + // ZipFile responds to paths with / not platform separator + ZipEntry zipEntry = zipFile.getEntry(fromPlatformSeparator(prefix) + entry.path(fullname)); if (zipEntry != null) { return func.apply(entry, zipFile.getInputStream(zipEntry)); } @@ -532,19 +531,18 @@ class ModuleEntry { } /** - * Given a simple module name, or the (last element of a dotted) module name, into a file - * name for the corresponding binary or source, calculate what file we're looking for, + * Given a simple or fully-qualified (dotted) module name, take the last element as the + * name for the corresponding binary or source, and calculate what file we're looking for, * according to the settings of isPackage or isBinary. * * @param fullname fully qualified (dotted) module name (e.g. "path.to.mymodule") * @return one of the four strings mymodule(/__init__|).(py|class) */ String path(String name) { - StringBuilder res = new StringBuilder(); - // Append the last element of the module - res.append(name.substring(name.lastIndexOf('.') + 1)); + // Start with the last element of the module + StringBuilder res = new StringBuilder(name.substring(name.lastIndexOf('.') + 1)); if (isPackage) { - res.append(File.separatorChar + "__init__"); + res.append("/__init__"); } if (isBinary) { res.append(".class"); @@ -565,12 +563,14 @@ String dir(String name) { } /** - * Return the same as {@link #path(String)} but as if isBinary==false. + * Return the same as {@link #path(String)} but as if isBinary==false, + * so we have the (expected) file name attribute of the module. * * @return "module/__init__.py" or "module.py". */ String sourcePath(String name) { - return new ModuleEntry(isPackage, false).path(name); + String file = new ModuleEntry(isPackage, false).path(name); + return toPlatformSeparator(file); } } } diff --git a/tests/java/org/python/modules/zipimport/ZipImportTest.java b/tests/java/org/python/modules/zipimport/ZipImportTest.java index ad49fdb1c..3265c7481 100644 --- a/tests/java/org/python/modules/zipimport/ZipImportTest.java +++ b/tests/java/org/python/modules/zipimport/ZipImportTest.java @@ -1,6 +1,8 @@ /* Copyright (c) 2017 Jython Developers */ package org.python.modules.zipimport; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -8,13 +10,12 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; -import java.io.Writer; import java.nio.ByteBuffer; -import java.nio.channels.Channels; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Instant; import java.util.HashMap; import java.util.Map; import java.util.zip.ZipEntry; @@ -51,25 +52,62 @@ public class ZipImportTest { /** The file structure in the zip. Separators are '/', irrespective of platform. */ // @formatter:off static final String[] STRUCTURE = { + // Used in testZipimporter_get_data "sub/dir/foo/", // FOO "sub/dir/foo/x/one.py", // ONE - "a/b/c/", - "a/b/c/__init__.py", - "a/b/c/two.py", - "a/b/three.py", - "four.py" + "sub/dir/foo/two.py", // TWO two is a module in sub/dir/foo + // A library whose path is "...test.zip" + "b/__init__.py", // b is a package + "b/three.py", // three is a module in b + "b/c/", + "b/c/__init__.py", // b.c is a package + "b/c/__init__.pyc", // should be preferred to __init__.py + "b/c/four.py", + "b/c/four.pyc", // should be preferred to four.py + "b/c/five.pyc", + "b/d/__init__.pyc", // b.d is a package (even though no __init__.py) + // No "b/d/__init__.py", + // A library whose path is "...test.zip/lib/a" + "lib/a/b/__init__.py", // b is a package + "lib/a/b/three.py", // three is a module in b + "lib/a/b/c/", + "lib/a/b/c/__init__.py", // b.c is a package + "lib/a/b/c/__init__.pyc", // should be preferred to __init__.py + "lib/a/b/c/four.py", + "lib/a/b/c/four.pyc", // should be preferred to four.py + "lib/a/b/c/five.pyc", + "lib/a/b/d/__init__.pyc", // b.d is a package (even though no __init__.py) + // No "lib/a/b/d/__init__.py", }; - static final int FOO = 0, ONE = 1; // Where they are in STRUCTURE + + static final int FOO = 0, ONE = 1, TWO = 2; // Where they are in STRUCTURE // @formatter:on - // (Relative) paths of sub-directories to the ZIP file and files within them. + /** (Relative) path of directory "foo" using platform separator. */ static final String FOOKEY = platform(STRUCTURE[FOO]); + /** (Relative) path of file "one.py" using platform separator. */ static final String ONEKEY = platform(STRUCTURE[ONE]); static final Charset UTF8 = Charset.forName("UTF-8"); - /** The file contents in the zip. */ - static final String[] CONTENT = new String[STRUCTURE.length]; + /** A time later than any source file, used on .class entries. */ + static final long LATER = Instant.parse("2038-01-01T00:00:00Z").toEpochMilli(); + + /** Map from file name to the binary contents in the zip entry. */ + private static final Map BINARY = new HashMap<>(STRUCTURE.length); + + /** Empty CPython module. Date 1 Jan 2038 to be later than the .py */ + static final byte[] EMPTY_MODULE = {0x16, 0xd, 0xd, 0xa, // magic number: CPython pythonrun.c + -128, 23, -24, 127, 0, 0, 0, 0, -29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 64, + 0, 0, 0, 115, 4, 0, 0, 0, 100, 0, 0, 83, 41, 1, 78, -87, 0, 114, 1, 0, 0, 0, 114, 1, 0, + 0, 0, 114, 1, 0, 0, 0, -6, 7, 60, 101, 109, 112, 116, 121, 62, -38, 8, 60, 109, 111, + 100, 117, 108, 101, 62, 1, 0, 0, 0, 115, 0, 0, 0, 0}; + + /** Empty .class (don't know what to write and don't care in these tests). */ + static final byte[] EMPTY_CLASS = {1, 2, 3, 4}; + + /** Number of zip entries created. */ + static int zipEntryCount = 0; @BeforeClass public static void createTestZip() throws IOException { @@ -83,45 +121,99 @@ public static void createTestZip() throws IOException { OutputStream out = new BufferedOutputStream(Files.newOutputStream(ARCHIVE_PATH)); // Wrap it so there are two inputs: one for ZipEntry objects and one for text to encode - try (ZipOutputStream zip = new ZipOutputStream(out); - Writer text = Channels.newWriter(Channels.newChannel(zip), "UTF-8")) { + try (ZipOutputStream zip = new ZipOutputStream(out)) { // Make an entry for each path mentioned in STRUCTURE for (int i = 0; i < STRUCTURE.length; i++) { // The structure table gives us names for the entries String name = STRUCTURE[i]; - ZipEntry entry = new ZipEntry(name); - zip.putNextEntry(entry); - // And we make up some content like this ... - String content = null; if (name.endsWith("/")) { // Directory entry: no content - // XXX What happens about intermediate directories? + zip.putNextEntry(new ZipEntry(name)); + zip.closeEntry(); + + } else if (name.endsWith(".pyc")) { + // Reduce to module name + createCompiledZipEntry(zip, name.substring(0, name.length() - 4)); + zipEntryCount += 1; + + } else if (name.endsWith(".py")) { + // Content is Python source + createTestZipEntry(zip, name, String.format("# %s\n", name)); + } else { - if (name.endsWith(".py")) { - // Content is Python source - content = String.format("# %s\n", name); - } else { - content = String.format("Contents of %s\n", name); - } - CONTENT[i] = content; - text.write(content); - text.flush(); + // Any other kind of file (just in case) + createTestZipEntry(zip, name, String.format("Contents of %s\n", name)); } + + zipEntryCount += 1; } } } + /** + * Helper to {@link #createTestZip()} creating two entries with binary content: one . + * + * @param zip file to write + * @param name of relative zip entry (a '/'-path) + * @throws IOException + */ + private static void createCompiledZipEntry(ZipOutputStream zip, String name) + throws IOException { + // Ensure time stamp on the binary is a long time in the future. + createTestZipEntry(zip, name + ".class", LATER, EMPTY_CLASS); + // But also make a valid .pyc file (for behavioural comparison with CPython) + createTestZipEntry(zip, name + ".pyc", LATER, EMPTY_MODULE); + } + + /** + * Helper to {@link #createTestZip()} creating one entry with binary content. + * + * @param zip file to write + * @param name of relative zip entry (a '/'-path) + * @param time in milliseconds since epoch for timestamp (use current time if <0) + * @param content to write in the entry + * @throws IOException + */ + private static void createTestZipEntry(ZipOutputStream zip, String name, long time, + byte[] content) throws IOException { + ZipEntry entry = new ZipEntry(name); + if (time >= 0L) { + entry.setTime(time); + } + zip.putNextEntry(entry); + zip.write(content); + BINARY.put(name, content); + zip.closeEntry(); + } + + /** + * Helper to {@link #createTestZip()} creating one entry with text content. + * + * @param zip file to write + * @param name of relative zip entry (a '/'-path) + * @param content to write in the entry + * @throws IOException + */ + private static void createTestZipEntry(ZipOutputStream zip, String name, String content) + throws IOException { + ByteBuffer buf = UTF8.encode(content); + byte[] bytes = new byte[buf.remaining()]; + buf.get(bytes); + long time = System.currentTimeMillis(); + createTestZipEntry(zip, name, time, bytes); + } + @AfterClass public static void deleteTestZip() { - try { - // Useful to look at from Python! - Files.deleteIfExists(ARCHIVE_PATH); - } catch (IOException e) { - // Meh! - } + // Useful to look at from Python! + // try { + // Files.deleteIfExists(ARCHIVE_PATH); + // } catch (IOException e) { + // Meh! + // } } @Before @@ -147,9 +239,8 @@ private static String unplatform(String path) { } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#PyZipImporter(java.lang.String)} where the - * archive path is relative to the working directory. + * Test method for {@link PyZipImporter#PyZipImporter(java.lang.String)} where the archive path + * is relative to the working directory. */ @Test public void testPyZipImporterRelative() { @@ -157,9 +248,8 @@ public void testPyZipImporterRelative() { } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#PyZipImporter(java.lang.String)}. where the - * archive path is absolute. + * Test method for {@link PyZipImporter#PyZipImporter(java.lang.String)}. where the archive path + * is absolute. */ @Test public void testPyZipImporterAbsolute() { @@ -222,7 +312,7 @@ private PyZipImporter testPyZipImporterHelper(String archivePath, String archive Map map = mapFromFiles(z.files); // Check in the map that we got what we should - assertEquals(STRUCTURE.length, map.size()); + assertEquals(zipEntryCount, map.size()); String fooPath = Paths.get(archive, FOOKEY).toString() + SEP; // subdir gets SEP assertEquals(fooPath, map.get(FOOKEY).__getitem__(0).toString()); String onePath = Paths.get(archive, ONEKEY).toString(); // file gets no SEP @@ -270,17 +360,13 @@ public void test__repr__() { } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_data(java.lang.String)}. + * Test method for {@link PyZipImporter#zipimporter_get_data(java.lang.String)}. */ @Test public void testZipimporter_get_data() { - // Compose a reference result (long-windedly: PyBytes(ByteBuffer) required!) - ByteBuffer buf = UTF8.encode(CONTENT[ONE]); - byte[] bytes = new byte[buf.remaining()]; - buf.get(bytes); - PyBytes expected = new PyBytes(bytes); + // Get the reference result + PyBytes expected = new PyBytes(BINARY.get(STRUCTURE[ONE])); // Test with construction from base archive and access by path within check_get_data(ARCHIVE, ONEKEY, expected); @@ -310,45 +396,198 @@ private void check_get_data(String archivePath, String name, PyBytes expected) { assertEquals(expected, actual); } + /** Class describing one entry in the ZIP file, parsed to various useful paths and elements. */ + static class EntrySplit { + + /** Path from root in the ZIP to the current module (Javanese: relative '/'-path). */ + final String parent; + /** Name of the current module. */ + final String name; + /** Extension (e.g. ".py") represented by the specification we split. */ + final String ext; + /** Whether the current module is a package. */ + final boolean isPackage; + /** Path that would be used by the import protocol to create the PyZipImporter. */ + final Path archivePath; + /** File path to and through the ZIP to the current module. */ + final Path filePath; + + /** Parse the path string into the parts needed. */ + EntrySplit(String entry) { + + // Split the entry into a path in the archive, name and extension. + int slash = entry.lastIndexOf('/'); + String parent = entry.substring(0, slash + 1); // ends with '/' or is "" = no '/' + int dot = entry.lastIndexOf('.'); + String name, ext; + if (dot < slash) { + dot = entry.length(); + } + name = entry.substring(slash + 1, dot); // e.g. "three" or "__init__" + ext = entry.substring(dot); // e.g. ".py" or ".class" or "" = no dot + + isPackage = "__init__".equals(name); + + if (isPackage) { + if (slash < 1) { + // Pretty sure it is not valid to have __init__ in the root. Blame the tools. + throw new IllegalArgumentException("Invalid entry: __init__ in root."); + } + /* + * Move everything up one in the hierarchy. The package name is at the end of the + * parent string and the zipimporter to use is the one constructed for the parent of + * that. + */ + slash = parent.lastIndexOf('/', slash - 1); + name = parent.substring(slash + 1, parent.length() - 1); + parent = parent.substring(0, slash + 1); + } + this.parent = parent; + this.name = name; + this.ext = ext; + + // Construct a path to the required place inside the archive + StringBuilder path = new StringBuilder(ARCHIVE); + path.append(SEP).append(platform(parent)); + this.archivePath = Paths.get(path.toString()); + + // The file path is estimated (since a binary might exist) & depends on package-ness. + path.append(name); + if (isPackage) { + path.append(SEP).append("__init__"); + } + path.append(ext); + this.filePath = Paths.get(path.toString()); + } + } + + /** Split a filename at the last '.', as long as it is not before the last separator. */ + private static int dotIndex(String entry, char sep) { + int slash = entry.lastIndexOf(sep); + int dot = entry.lastIndexOf('.'); + if (dot <= slash) { + dot = entry.length(); + } + return dot; + } + /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_load_module(java.lang.String)}. + * Test method for {@link PyZipImporter#zipimporter_get_filename(java.lang.String)}. Given this + * list of entries in the archive: + * + *

+     * >>> from zipimport import zipimporter as zi
+     * >>> archive = r'tmpdir_ZipImportTest\sub\test.zip'
+     * >>> z = zi(archive)
+     * >>> for n in sorted(z._files.keys()): print(n, z._files[n][5:7]) # (time, date)
+     * ...
+     * b\__init__.py (44545, 19185)
+     * b\c\ (44545, 19185)
+     * b\c\__init__.class (0, 29729)
+     * b\c\__init__.py (44545, 19185)
+     * b\c\__init__.pyc (0, 29729)
+     * b\c\five.class (0, 29729)
+     * b\c\five.pyc (0, 29729)
+     * b\c\four.class (0, 29729)
+     * b\c\four.py (44545, 19185)
+     * b\c\four.pyc (0, 29729)
+     * b\d\__init__.class (0, 29729)
+     * b\d\__init__.pyc (0, 29729)
+     * b\three.py (44545, 19185)
+     * 
CPython behaviour is:
+     * >>> z.get_filename('b')
+     * 'tmpdir_ZipImportTest\\sub\\test.zip\\b\\__init__.py'
+     * >>> zb = zi(archive + '/b')
+     * >>> zb.get_filename('c')
+     * 'tmpdir_ZipImportTest\\sub\\test.zip\\b\\c\\__init__.py'
+     * >>> zb.get_filename('d')
+     * 'tmpdir_ZipImportTest\\sub\\test.zip\\b\\d\\__init__.pyc'
+     * >>> zc = zi(archive + '/b/c')
+     * >>> zc.get_filename('four')
+     * 'tmpdir_ZipImportTest\\sub\\test.zip\\b\\c\\four.py'
+     * >>> zc.get_filename('five')
+     * 'tmpdir_ZipImportTest\\sub\\test.zip\\b\\c\\five.pyc'
+     * 
That is, we report the .py file name if there is one and the + * .pyc file name if the .py is missing (but in Jython, the + * .class file should be expected). */ - // @Test - public void testZipimporter_load_module() { - fail("Not yet implemented"); + @Test + public void testZipimporter_get_filename() { + // Test with each of the entries that is not just a folder + for (String entry : STRUCTURE) { + if (entry.endsWith(".py")) { + // Parse the entry string into the parts we need and test + EntrySplit split = new EntrySplit(entry); + check_get_filename(split); + } else if (entry.endsWith(".class")) { + // In this case, if a .py exists, we should be given that instead + String pyRelativePath = entry.substring(entry.length() - 6) + ".py"; + if (BINARY.containsKey(pyRelativePath)) { + entry = pyRelativePath; + } + // Parse the entry string into the parts we need and test + EntrySplit split = new EntrySplit(entry); + check_get_filename(split); + } + } } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_is_package(java.lang.String)}. + * When we ask for the module indicated by each the {@link #STRUCTURE} entry, we should get as a + * file name a string consistent with the entry, allowing for the several ways to satisfy the + * request, and for the use of the platform {@link File#separatorChar}. + * + * @param split derived from a non-directory entry in {@link #STRUCTURE}. + */ + private static void check_get_filename(EntrySplit split) { + // Make a PyZipImporter for this entry (parent of module or package) + PyZipImporter z = new PyZipImporter(split.archivePath.toString()); + + // The file path in the split tells us What we should have, except for the extension. + String expected = split.filePath.toString(); + + // Only the last element ought to be used. + String target = "ignore.me." + split.name; + PyObject filenameObject = z.zipimporter_get_filename(target); + + if (filenameObject instanceof PyUnicode) { + // Compare with expected value and actual, but only up to the dot + String filename = ((PyUnicode) filenameObject).getString(); + int dot = dotIndex(filename, SEP); + assertThat("base file path", filename.substring(0, dot), + equalTo(expected.substring(0, dotIndex(expected, SEP)))); + assertThat("file path for " + target, filename, equalTo(expected)); + } else { + fail("get_filename() result not a str object"); + } + } + + /** + * Test method for {@link PyZipImporter#zipimporter_load_module(java.lang.String)}. */ // @Test - public void testZipimporter_is_package() { + public void testZipimporter_load_module() { fail("Not yet implemented"); } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_source(java.lang.String)}. + * Test method for {@link PyZipImporter#zipimporter_is_package(java.lang.String)}. */ // @Test - public void testZipimporter_get_source() { + public void testZipimporter_is_package() { fail("Not yet implemented"); } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_filename(java.lang.String)}. + * Test method for {@link PyZipImporter#zipimporter_get_source(java.lang.String)}. */ // @Test - public void testZipimporter_get_filename() { + public void testZipimporter_get_source() { fail("Not yet implemented"); } /** - * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_get_code(java.lang.String)}. + * Test method for {@link PyZipImporter#zipimporter_get_code(java.lang.String)}. */ // @Test public void testZipimporter_get_code() { @@ -357,7 +596,7 @@ public void testZipimporter_get_code() { /** * Test method for - * {@link org.python.modules.zipimport.PyZipImporter#zipimporter_find_spec(org.python.core.PyObject[], java.lang.String[])}. + * {@link PyZipImporter#zipimporter_find_spec(org.python.core.PyObject[], java.lang.String[])}. * * Disabled test. find_spec is not implemented in * zipimport.zipimporter in CPython 3.5 as far as we can tell, and by not having it