From df429ca54806bef5bc5f99a30bd7f8ab29413500 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 18 Aug 2024 22:51:07 +0200 Subject: [PATCH] move docs for _osmium module to pyi files This gives us better control over the output and is more easy to maintain. --- lib/file_iterator.cc | 6 +- lib/merge_input_reader.cc | 22 +--- lib/node_location_handler.cc | 4 +- lib/osmium.cc | 26 ++--- lib/simple_writer.cc | 39 +------ src/osmium/_osmium.pyi | 210 ++++++++++++++++++++++++++++++----- src/osmium/file_processor.py | 91 +++++++++++---- src/osmium/helper.py | 12 -- 8 files changed, 271 insertions(+), 139 deletions(-) diff --git a/lib/file_iterator.cc b/lib/file_iterator.cc index 03ff4d1e..045de77f 100644 --- a/lib/file_iterator.cc +++ b/lib/file_iterator.cc @@ -139,8 +139,7 @@ namespace pyosmium { void init_osm_file_iterator(py::module &m) { - py::class_(m, "OsmFileIterator", - "Iterator interface for reading an OSM file.") + py::class_(m, "OsmFileIterator") .def(py::init(), py::keep_alive<0, 1>()) .def("set_filtered_handler", &OsmFileIterator::set_filtered_handler, @@ -148,8 +147,7 @@ void init_osm_file_iterator(py::module &m) .def("set_filtered_handler", &OsmFileIterator::set_filtered_python_handler, py::keep_alive<0, 1>()) .def("__iter__", [](py::object const &self) { return self; }) - .def("__next__", &OsmFileIterator::next, - "Get the next OSM object from the file or raise a StopIteration.") + .def("__next__", &OsmFileIterator::next) ; } diff --git a/lib/merge_input_reader.cc b/lib/merge_input_reader.cc index 7ed30617..52cd7220 100644 --- a/lib/merge_input_reader.cc +++ b/lib/merge_input_reader.cc @@ -165,30 +165,16 @@ namespace pyosmium { void init_merge_input_reader(py::module &m) { - py::class_(m, "MergeInputReader", - "Collects data from multiple input files, sorts and optionally " - "deduplicates the data before applying it to a handler.") + py::class_(m, "MergeInputReader") .def(py::init<>()) .def("_apply_internal", &MergeInputReader::apply_internal, py::arg("simplify")=true) .def("apply_to_reader", &MergeInputReader::apply_to_reader, - py::arg("reader"), py::arg("writer"), py::arg("with_history")=false, - "Apply the collected data to data from the given `reader` and write " - "the result to `writer`. This function can be used to merge the diff " - "data together with other OSM data (for example when updating a " - "planet file. If `with_history` is true, then the collected data will " - "be applied verbatim without removing duplicates. This is important " - "when using OSM history files as input.") + py::arg("reader"), py::arg("writer"), py::arg("with_history")=false) .def("add_file", &MergeInputReader::add_file, - py::arg("file"), - "Add data from a file to the internal cache. The file type will be " - "determined from the file extension.") + py::arg("file")) .def("add_buffer", &MergeInputReader::add_buffer, - py::arg("buffer"), py::arg("format"), - "Add data from a byte buffer. The format of the input data must " - "be given in the `format` argument as a string. The data will be " - "copied into internal buffers, so that the input buffer can be " - "safely discarded after the function has been called.") + py::arg("buffer"), py::arg("format")) ; }; diff --git a/lib/node_location_handler.cc b/lib/node_location_handler.cc index 90fa1123..86776875 100644 --- a/lib/node_location_handler.cc +++ b/lib/node_location_handler.cc @@ -67,9 +67,7 @@ void init_node_location_handler(py::module &m) .def("ignore_errors", &NodeLocationsForWays::ignore_errors) .def_property("apply_nodes_to_ways", &NodeLocationsForWays::get_apply_nodes_to_ways, - &NodeLocationsForWays::set_apply_nodes_to_ways, - "When set to false, locations are only collected " - "and not automatically applied to way nodes.") + &NodeLocationsForWays::set_apply_nodes_to_ways) ; } diff --git a/lib/osmium.cc b/lib/osmium.cc index 4200241c..b057665b 100644 --- a/lib/osmium.cc +++ b/lib/osmium.cc @@ -82,48 +82,38 @@ PYBIND11_MODULE(_osmium, m) { }); m.def("apply", &pyosmium::apply, - py::arg("reader"), py::arg("handler"), - "Apply a single handler."); + py::arg("reader"), py::arg("handler")); m.def("apply", [](osmium::io::Reader &rd, py::args args) { pyosmium::HandlerChain handler{args}; pyosmium::apply(rd, handler); }, - py::arg("reader"), - "Apply a chain of handlers."); + py::arg("reader")); m.def("apply", [](std::string fn, pyosmium::BaseHandler &h) { osmium::io::Reader rd{fn}; pyosmium::apply(rd, h); }, - py::arg("filename"), py::arg("handler"), - "Apply a single handler."); + py::arg("filename"), py::arg("handler")); m.def("apply", [](std::string fn, py::args args) { pyosmium::HandlerChain handler{args}; osmium::io::Reader rd{fn}; pyosmium::apply(rd, handler); }, - py::arg("filename"), - "Apply a chain of handlers."); + py::arg("filename")); - py::class_(m, "BaseHandler", - "Base class for all handlers in pyosmium. Any class inheriting " - "from this class can be used in functions that require a " - "handler-like object."); + py::class_(m, "BaseHandler"); py::class_(m, "BaseFilter") .def("enable_for", &pyosmium::BaseFilter::enable_for, - py::arg("entities"), - "Set the OSM types this filter should be used for.") + py::arg("entities")) ; - py::class_(m, "BufferIterator", - "Iterator interface for reading from a queue of buffers.") + py::class_(m, "BufferIterator") .def(py::init()) .def("__bool__", [](pyosmium::BufferIterator const &it) { return !it.empty(); }) .def("__iter__", [](py::object const &self) { return self; }) - .def("__next__", &pyosmium::BufferIterator::next, - "Get the next OSM object from the buffer or raise an StopIteration.") + .def("__next__", &pyosmium::BufferIterator::next) ; pyosmium::init_merge_input_reader(m); diff --git a/lib/simple_writer.cc b/lib/simple_writer.cc index 3f2c20cf..cac9aec4 100644 --- a/lib/simple_writer.cc +++ b/lib/simple_writer.cc @@ -338,36 +338,15 @@ namespace pyosmium { void init_simple_writer(pybind11::module &m) { - py::class_(m, "SimpleWriter", - "The most generic class to write osmium objects into a file. The writer " - "takes a file name as its mandatory parameter. The file must not yet " - "exist. The file type to output is determined from the file extension. " - "The second (optional) parameter is the buffer size. osmium caches the " - "output data in an internal memory buffer before writing it on disk. This " - "parameter allows changing the default buffer size of 4MB. Larger buffers " - "are normally better but you should be aware that there are normally multiple " - "buffers in use during the write process.\n\n" - "The writer will not overwrite existing files by default. Set `overwrite` " - "to True to allow overwriting.\n\n" - "The SimpleWriter can also functions as a handler and will write out " - "all node, ways and relations, it receives.") + py::class_(m, "SimpleWriter") .def(py::init(), py::arg("filename"), py::arg("bufsz") = 4096*1024, py::arg("header") = nullptr, py::arg("overwrite") = false, py::arg("filetype") = "") - .def("add_node", &SimpleWriter::add_node, py::arg("node"), - "Add a new node to the file. The node may be an ``osmium.osm.Node`` object, " - "an ``osmium.osm.mutable.Node`` object or any other Python object that " - "implements the same attributes.") - .def("add_way", &SimpleWriter::add_way, py::arg("way"), - "Add a new way to the file. The way may be an ``osmium.osm.Way`` object, " - "an ``osmium.osm.mutable.Way`` object or any other Python object that " - "implements the same attributes.") - .def("add_relation", &SimpleWriter::add_relation, py::arg("relation"), - "Add a new relation to the file. The relation may be an " - "``osmium.osm.Relation`` object, an ``osmium.osm.mutable.Relation`` " - "object or any other Python object that implements the same attributes.") + .def("add_node", &SimpleWriter::add_node, py::arg("node")) + .def("add_way", &SimpleWriter::add_way, py::arg("way")) + .def("add_relation", &SimpleWriter::add_relation, py::arg("relation")) .def("add", [](SimpleWriter &self, py::object const &o) { if (py::isinstance(o) || py::hasattr(o, "location")) { self.add_node(o); @@ -378,14 +357,8 @@ void init_simple_writer(pybind11::module &m) } else { throw py::type_error("Need node, way or relation object."); } - }, - "Add a new object to the file. The function will try to determine " - "the kind of object automatically.") - .def("close", &SimpleWriter::close, - "Flush the remaining buffers and close the writer. While it is not " - "strictly necessary to call this function explicitly, it is still " - "strongly recommended to close the writer as soon as possible, so " - "that the buffer memory can be freed.") + }) + .def("close", &SimpleWriter::close) .def("__enter__", [](py::object const &self) { return self; }) .def("__exit__", [](SimpleWriter &self, py::args args) { self.close(); }) ; diff --git a/src/osmium/_osmium.pyi b/src/osmium/_osmium.pyi index 56feac09..db418872 100644 --- a/src/osmium/_osmium.pyi +++ b/src/osmium/_osmium.pyi @@ -17,57 +17,205 @@ StrPath = Union[str, 'os.PathLike[str]'] # Placeholder for more narrow type defintion to come HandlerLike = object -class InvalidLocationError(Exception): ... +class InvalidLocationError(Exception): + """ Raised when the location of a node is requested that has + no valid location. To be valid, a location must be + inside the -180 to 180 and -90 to 90 degree range. + """ -class BaseHandler: ... +class BaseHandler: + """ Base class for all native handler functions in pyosmium. + Any class that derives from this class can be used for + parameters that need a handler-like object. + """ class BaseFilter(BaseHandler): - def enable_for(self, entities: osm_entity_bits) -> None: ... + """ Base class for all native filter functions in pyosmium. + A filter is a handler that returns a boolean in the handler + functions indicating if the object should pass the filter (False) + or be dropped (True). + """ + def enable_for(self, entities: osm_entity_bits) -> None: + """ Set the OSM types this filter should be applied to. If + an object has a type for wich the filter is not enabled, + the filter will be skipped completely. Or to put it in + different words: every object for which the filter is not + enabled, passes the filter automatically. + """ class BufferIterator: - def __init__(self, *handlers: HandlerLike) -> None: ... - def __bool__(self) -> bool: ... - def __iter__(self) -> 'BufferIterator': ... - def __next__(self) -> OSMEntity: ... + """ (internal) Iterator interface for reading from a queue of buffers. + + This class is needed for pyosmium's internal implementation. There is + currently no way to create buffers or add them to the iterator + from Python. + """ + def __init__(self, *handlers: HandlerLike) -> None: + """ Create a new iterator. The iterator will pass each + object through the filter chain _handlers_ before returning + it. + """ + def __bool__(self) -> bool: + """ True if there are any objects left to return. + """ + def __iter__(self) -> 'BufferIterator': + """ Returns itself. + """ + def __next__(self) -> OSMEntity: + """ Get the next OSM object from the buffer or raise an StopIteration. + """ class MergeInputReader: - def __init__(self) -> None: ... - def _apply_internal(self, *handlers: HandlerLike, simplify: bool = ...) -> None: ... - def add_buffer(self, buffer: Union[ByteString, str], format: str) -> int: ... - def add_file(self, file: str) -> int: ... - def apply_to_reader(self, reader: Reader, writer: Writer, with_history: bool = ...) -> None: ... - def apply(self, *handlers: Any, idx: str = '', simplify: bool = True) -> None: ... - + """ Buffer which collects data from multiple input files, sorts it + and optionally deduplicates the data before applying to a handler. + """ + def __init__(self) -> None: + """ Initialize a new reader. + """ + def _apply_internal(self, *handlers: HandlerLike, simplify: bool = ...) -> None: + """ Internal application function. Do not use. + """ + def add_buffer(self, buffer: Union[ByteString, str], format: str) -> int: + """ Add input data from a buffer to the reader. The buffer may + be any data which follows the Python buffer protocol. The + manadatory _format_ parameter describes the format of the data. + + The data will be copied into internal buffers, so that the input + buffer can be safely discarded after the function has been called. + """ + def add_file(self, file: str) -> int: + """ Add data from the given input file _file_ to the reader. + """ + def apply_to_reader(self, reader: Reader, writer: Writer, with_history: bool = ...) -> None: + """ Apply the collected data to data from the given _reader_ and write + the result to _writer_. This function can be used to merge the diff " + data together with other OSM data (for example when updating a + planet file. If _with_history_ is true, then the collected data will + be applied verbatim without removing duplicates. This is important + when using OSM history files as input. + """ + def apply(self, *handlers: HandlerLike, idx: str = '', simplify: bool = True) -> None: + """ Apply collected data to a handler. The data will be sorted first. + If _simplify_ is true (default) then duplicates will be eliminated + and only the newest version of each object kept. If _idx_ is given + a node location cache with the given type will be created and + applied when creating the ways. Note that a diff file normally does + not contain all node locations to reconstruct changed ways. If the + full way geometries are needed, create a persistent node location + cache during initial import of the area and reuse it when processing + diffs. After the data + has been applied the buffer of the MergeInputReader is empty and + new data can be added for the next round of application. + """ class SimpleWriter: + """ Basic writer for OSM data. The SimpleWriter can write out + object that are explicitly passed or function as a handler and + write out all objects it receives. It is also possible to + mix these two modes of operations. + + The writer writes out the objects in the order it receives them. + It is the responsibility of the caller to ensure to follow the + [ordering conventions](../user_manual/01-First-Steps.ipynb#the-order-of-osm-files) + for OSM files. + + The SimpleWriter should normally used as a context manager. If you + don't use it in a `with` context, don't forget to call `close()`, + when writing is finished. + """ def __init__(self, filename: str, bufsz: int= ..., header: Optional[Header]= ..., overwrite: bool= ..., - filetype: str= ...) -> None: ... - def add_node(self, node: object) -> None: ... - def add_relation(self, relation: object) -> None: ... - def add_way(self, way: object) -> None: ... - def add(self, obj: object) -> None: ... - def close(self) -> None: ... + filetype: str= ...) -> None: + """ Initiate a new writer for the file _filename_. The writer will + refuse to overwrite an already existing file unless _overwrite_ + is explicitly set to `True`. The file type is usually determined + from the file extension. It can also be set explicitly with the + _filetype_ parameter. + + The optional parameter _bufsz_ sets the size of the buffers used + for collecting the data before they are written out. The default + size is 4MB. Larger buffers are normally better but you should + be aware that there are normally multiple buffers in use during + the write process. + """ + def add_node(self, node: object) -> None: + """ Add a new node to the file. The node may be a + [Node](Dataclasses.md#osmium.osm.Node] object or its mutable + variant or any other Python object that implements the same + attributes. + """ + def add_relation(self, relation: object) -> None: + """ Add a new relation to the file. The relation may be a + [Relation](Dataclasses.md#osmium.osm.Relation] object or its mutable + variant or any other Python object that implements the same + attributes. + """ + def add_way(self, way: object) -> None: + """ Add a new way to the file. The way may be a + [Way](Dataclasses.md#osmium.osm.Way] object or its mutable + variant or any other Python object that implements the same + attributes. + """ + def add(self, obj: object) -> None: + """ Add a new object to the file. The function will try to determine + the kind of object automatically. + """ + def close(self) -> None: + """ Flush the remaining buffers and close the writer. While it is not + strictly necessary to call this function explicitly, it is still + strongly recommended to close the writer as soon as possible, so + that the buffer memory can be freed. + """ def __enter__(self) -> 'SimpleWriter':... def __exit__(self, *args: Any) -> None:... class NodeLocationsForWays: - apply_nodes_to_ways: bool - def __init__(self, locations: LocationTable) -> None: ... - def ignore_errors(self) -> None: ... + """ Handler for retriving and caching locations from ways + and adding them to ways. + """ + @property + def apply_nodes_to_ways(self) -> bool: + """ When set (the default), the collected locations + are propagated to the node list of ways. + """ + @apply_nodes_to_ways.setter + def apply_nodes_to_ways(self, value: bool) -> None:... + + def __init__(self, locations: LocationTable) -> None: + """ Intiate a new handler using the given location table _locations_ + to cache the node coordinates. + """ + def ignore_errors(self) -> None: + """ Disable raising an exception when filling the node list of + a way and a coordinate is not available. + """ class OsmFileIterator: - def __init__(self, reader: Reader, *handlers: HandlerLike) -> None: ... - def set_filtered_handler(self, handler: object) -> None: ... - def __iter__(self) -> 'OsmFileIterator': ... - def __next__(self) -> OSMEntity: ... + """ Low-level iterator interface for reading from an OSM source. + """ + def __init__(self, reader: Reader, *handlers: HandlerLike) -> None: + """ Initialise a new iterator using the given _reader_ as source. + Each object is passed through the list of filters given by + _handlers_. If all the filters are passed, the object is + returned by `next()`. + """ + def set_filtered_handler(self, handler: object) -> None: + """ Set a fallback handler for objects that have been filtered + out. The objects will be passed to the single handler. + """ + def __iter__(self) -> 'OsmFileIterator': + """ Returns itself. + """ + def __next__(self) -> OSMEntity: + """ Get the next OSM object from the file or raise a StopIteration. + """ class IdTrackerIdFilter(BaseFilter): ... @@ -91,4 +239,10 @@ class IdTracker: def way_ids(self) -> IdSet: ... def relation_ids(self) -> IdSet: ... -def apply(reader: Union[Reader | str], *handlers: HandlerLike) -> None: ... +def apply(reader: Union[Reader | str], *handlers: HandlerLike) -> None: + """ Apply a chain of handlers to the given input source. The input + source may be given either as a [Reader](IO.md#osmium.io.Reader) or + as a simple file name. If one of the handler is a + [filter](osmium.BaseFilter), then processing of the object will + be stopped if it does not pass the filter. + """ diff --git a/src/osmium/file_processor.py b/src/osmium/file_processor.py index 953795c8..f1b238bc 100644 --- a/src/osmium/file_processor.py +++ b/src/osmium/file_processor.py @@ -12,15 +12,27 @@ from osmium.osm.types import OSMEntity class FileProcessor: - """ A generator that emits OSM objects read from a file. + """ A processor that reads an OSM file in a streaming fashion, + optionally pre-filters the data, enhances it with geometry information, + returning the data via an iterator. """ - def __init__(self, filename: Union[osmium.io.File, osmium.io.FileBuffer, str, Path], + def __init__(self, indata: Union[osmium.io.File, osmium.io.FileBuffer, str, Path], entities: osmium.osm.osm_entity_bits=osmium.osm.ALL) -> None: - if isinstance(filename, (osmium.io.File, osmium.io.FileBuffer)): - self._file = filename - elif isinstance(filename, (str, Path)): - self._file = osmium.io.File(str(filename)) + """ Initialise a new file processor for the given input source _indata_. + This may either be a filename, an instance of [File](IO.md#osmium.io.File) + or buffered data in form of a [FileBuffer](IO.md#osmium.io.FileBuffer). + + The types of objects which will be read from the file can be + restricted with the _entities_ parameter. The data will be skipped + directly at the source file and will never be passed to any filters + including the location and area processors. You usually should not + be restricting objects, when using those. + """ + if isinstance(indata, (osmium.io.File, osmium.io.FileBuffer)): + self._file = indata + elif isinstance(indata, (str, Path)): + self._file = osmium.io.File(str(indata)) else: raise TypeError("File must be an osmium.io.File, osmium.io.FileBuffer, str or Path") self._entities = entities @@ -32,20 +44,35 @@ def __init__(self, filename: Union[osmium.io.File, osmium.io.FileBuffer, str, Pa @property def header(self) -> osmium.io.Header: - """ Return the header information for the file to be read. + """ (read-only) [Header](IO.md#osmium.io.Header) information + for the file to be read. """ return osmium.io.Reader(self._file, osmium.osm.NOTHING).header() @property def node_location_storage(self) -> Optional[LocationTable]: - """ Return the node location cache, if enabled. + """ Node location cache currently in use, if enabled. This can be used to manually look up locations of nodes. + Be aware that the nodes must have been read before you + can do a lookup via the location storage. """ return self._node_store def with_locations(self, storage: str='flex_mem') -> 'FileProcessor': - """ Enable caching of node locations. This is necessary in order - to get geometries for ways and relations. + """ Enable caching of node locations. The file processor will keep + the coordinates of all nodes that are read from the file in + memory and automatically enhance the node list of ways with + the coordinates from the cache. This information can then be + used to create geometries for ways. The node location cache can + also be directly queried through the [node_location_storage]() property. + + The _storage_ parameter can be used to change the type of cache + used to store the coordinates. The default 'flex_mem' is good for + small to medium-sized files. For large files you may need to + switch to a disk-storage based implementation because the cache + can become quite large. See the section on + [location storage in the user manual](../user_manual/03-Working-with-Geometries.ipynb#location-storage) + for more information. """ if not (self._entities & osmium.osm.NODE): raise RuntimeError('Nodes not read from file. Cannot enable location cache.') @@ -66,16 +93,14 @@ def with_areas(self, *filters: 'osmium._osmium.HandlerLike') -> 'FileProcessor': Optionally one or more filters can be passed. These filters will be applied in the first pass, when relation candidates for areas are selected. - Calling this function multiple times causes more filters to be added to the filter chain. - Automatically enables location caching, if it was not yet set. - It uses the default location cache type. To use a different - cache type, you need to call with_locations() explicity. - - Area processing requires that the file is read twice. This - happens transparently. + Calling this function automatically enables location caching + if it was not enabled yet using the default storage type. + To use a different storage type, call `with_locations()` explicity + with the approriate _storage_ parameter before calling this + function. """ if self._area_handler is None: self._area_handler = osmium.area.AreaManager() @@ -85,23 +110,43 @@ def with_areas(self, *filters: 'osmium._osmium.HandlerLike') -> 'FileProcessor': return self def with_filter(self, filt: 'osmium._osmium.HandlerLike') -> 'FileProcessor': - """ Add a filter function that is called before an object is - returned in the iterator. Filters are applied sequentially - in the order they were added. + """ Add a filter function to the processors filter chain. + Filters are called for each prcoessed object in the order they + have been installed. Only when the object passes all the + filter functions will it be handed to the iterator. + + Note that any handler-like object can be installed as a filter. + A non-filtering handler simply works like an all-pass filter. """ self._filters.append(filt) return self def handler_for_filtered(self, handler: 'osmium._osmium.HandlerLike') -> 'FileProcessor': - """ Set a handler to be called on all objects that have been - filtered out and are not presented to the iterator loop. + """ Set a fallback handler for object that have been filtered out. + + Any object that does not pass the filter chain installed with + `with_filter()` will be passed to this handler. This can be useful + when the entire contents of a file should be passed to a writer + and only some of the objects need to be processed specially + in the iterator body. """ self._filtered_handler = handler return self def __iter__(self) -> Iterator[OSMEntity]: - """ Return the iterator over the file. + """ Create a new iterator for the file processor. It is possible to + create mulitple iterators from the same processor and even run + them in parallel. However, you must not change the properties + of the file processor while a iterator is in progress of reading + a file. + + When area processing is enabled, then the input data needs to + be read twice. The first pass reads the relations, while the + second pass reads the whole file. The iterator will do this + transparantly for the user. However, be aware that the first + pass of reading may take a while for large files, so that the + iterator might block before the first object is returned. """ handlers: List['osmium._osmium.HandlerLike'] = [] diff --git a/src/osmium/helper.py b/src/osmium/helper.py index 36f29a4f..22ec6b20 100644 --- a/src/osmium/helper.py +++ b/src/osmium/helper.py @@ -60,18 +60,6 @@ def __init__(self, filename: str, bufsz: int=4096*1024, filetype: str="") -> Non def _merge_apply(self: MergeInputReader, *handlers: 'HandlerLike', idx: str = '', simplify: bool = True) -> None: - """ Apply collected data to a handler. The data will be sorted first. - If `simplify` is true (default) then duplicates will be eliminated - and only the newest version of each object kept. If `idx` is given - a node location cache with the given type will be created and - applied when creating the ways. Note that a diff file normally does - not contain all node locations to reconstruct changed ways. If the - full way geometries are needed, create a persistent node location - cache during initial import of the area and reuse it when processing - diffs. After the data - has been applied the buffer of the MergeInputReader is empty and - new data can be added for the next round of application. - """ if idx: lh = NodeLocationsForWays(create_map(idx)) lh.ignore_errors()