From df429ca54806bef5bc5f99a30bd7f8ab29413500 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Sun, 18 Aug 2024 22:51:07 +0200
Subject: [PATCH] move docs for _osmium module to pyi files

This gives us better control over the output and is more easy to
maintain.
---
 lib/file_iterator.cc         |   6 +-
 lib/merge_input_reader.cc    |  22 +---
 lib/node_location_handler.cc |   4 +-
 lib/osmium.cc                |  26 ++---
 lib/simple_writer.cc         |  39 +------
 src/osmium/_osmium.pyi       | 210 ++++++++++++++++++++++++++++++-----
 src/osmium/file_processor.py |  91 +++++++++++----
 src/osmium/helper.py         |  12 --
 8 files changed, 271 insertions(+), 139 deletions(-)
diff --git a/lib/file_iterator.cc b/lib/file_iterator.cc
index 03ff4d1e..045de77f 100644
--- a/lib/file_iterator.cc
+++ b/lib/file_iterator.cc
@@ -139,8 +139,7 @@ namespace pyosmium {
 
 void init_osm_file_iterator(py::module &m)
 {
-    py::class_<OsmFileIterator>(m, "OsmFileIterator",
-        "Iterator interface for reading an OSM file.")
+    py::class_<OsmFileIterator>(m, "OsmFileIterator")
         .def(py::init<osmium::io::Reader *, py::args>(),
              py::keep_alive<0, 1>())
         .def("set_filtered_handler", &OsmFileIterator::set_filtered_handler,
@@ -148,8 +147,7 @@ void init_osm_file_iterator(py::module &m)
         .def("set_filtered_handler", &OsmFileIterator::set_filtered_python_handler,
              py::keep_alive<0, 1>())
         .def("__iter__", [](py::object const &self) { return self; })
-        .def("__next__", &OsmFileIterator::next,
-             "Get the next OSM object from the file or raise a StopIteration.")
+        .def("__next__", &OsmFileIterator::next)
         ;
 }
 
diff --git a/lib/merge_input_reader.cc b/lib/merge_input_reader.cc
index 7ed30617..52cd7220 100644
--- a/lib/merge_input_reader.cc
+++ b/lib/merge_input_reader.cc
@@ -165,30 +165,16 @@ namespace pyosmium {
 
 void init_merge_input_reader(py::module &m)
 {
-    py::class_<MergeInputReader>(m, "MergeInputReader",
-        "Collects data from multiple input files, sorts and optionally "
-        "deduplicates the data before applying it to a handler.")
+    py::class_<MergeInputReader>(m, "MergeInputReader")
         .def(py::init<>())
         .def("_apply_internal", &MergeInputReader::apply_internal,
              py::arg("simplify")=true)
         .def("apply_to_reader", &MergeInputReader::apply_to_reader,
-             py::arg("reader"), py::arg("writer"), py::arg("with_history")=false,
-             "Apply the collected data to data from the given `reader` and write "
-             "the result to `writer`. This function can be used to merge the diff "
-             "data together with other OSM data (for example when updating a "
-             "planet file. If `with_history` is true, then the collected data will "
-             "be applied verbatim without removing duplicates. This is important "
-             "when using OSM history files as input.")
+             py::arg("reader"), py::arg("writer"), py::arg("with_history")=false)
         .def("add_file", &MergeInputReader::add_file,
-             py::arg("file"),
-             "Add data from a file to the internal cache. The file type will be "
-             "determined from the file extension.")
+             py::arg("file"))
         .def("add_buffer", &MergeInputReader::add_buffer,
-             py::arg("buffer"), py::arg("format"),
-             "Add data from a byte buffer. The format of the input data must "
-             "be given in the `format` argument as a string. The data will be "
-             "copied into internal buffers, so that the input buffer can be "
-             "safely discarded after the function has been called.")
+             py::arg("buffer"), py::arg("format"))
     ;
 };
 
diff --git a/lib/node_location_handler.cc b/lib/node_location_handler.cc
index 90fa1123..86776875 100644
--- a/lib/node_location_handler.cc
+++ b/lib/node_location_handler.cc
@@ -67,9 +67,7 @@ void init_node_location_handler(py::module &m)
         .def("ignore_errors", &NodeLocationsForWays::ignore_errors)
         .def_property("apply_nodes_to_ways",
                      &NodeLocationsForWays::get_apply_nodes_to_ways,
-                     &NodeLocationsForWays::set_apply_nodes_to_ways,
-                     "When set to false, locations are only collected "
-                     "and not automatically applied to way nodes.")
+                     &NodeLocationsForWays::set_apply_nodes_to_ways)
     ;
 
 }
diff --git a/lib/osmium.cc b/lib/osmium.cc
index 4200241c..b057665b 100644
--- a/lib/osmium.cc
+++ b/lib/osmium.cc
@@ -82,48 +82,38 @@ PYBIND11_MODULE(_osmium, m) {
     });
 
     m.def("apply", &pyosmium::apply,
-          py::arg("reader"), py::arg("handler"),
-          "Apply a single handler.");
+          py::arg("reader"), py::arg("handler"));
     m.def("apply", [](osmium::io::Reader &rd, py::args args)
                      {
                          pyosmium::HandlerChain handler{args};
                          pyosmium::apply(rd, handler);
                      },
-          py::arg("reader"),
-          "Apply a chain of handlers.");
+          py::arg("reader"));
     m.def("apply", [](std::string fn, pyosmium::BaseHandler &h)
                    {
                        osmium::io::Reader rd{fn};
                        pyosmium::apply(rd, h);
                    },
-          py::arg("filename"), py::arg("handler"),
-          "Apply a single handler.");
+          py::arg("filename"), py::arg("handler"));
     m.def("apply", [](std::string fn, py::args args)
                      {
                          pyosmium::HandlerChain handler{args};
                          osmium::io::Reader rd{fn};
                          pyosmium::apply(rd, handler);
                      },
-          py::arg("filename"),
-          "Apply a chain of handlers.");
+          py::arg("filename"));
 
-    py::class_<pyosmium::BaseHandler>(m, "BaseHandler",
-             "Base class for all handlers in pyosmium. Any class inheriting "
-             "from this class can be used in functions that require a "
-             "handler-like object.");
+    py::class_<pyosmium::BaseHandler>(m, "BaseHandler");
     py::class_<pyosmium::BaseFilter, pyosmium::BaseHandler>(m, "BaseFilter")
         .def("enable_for", &pyosmium::BaseFilter::enable_for,
-             py::arg("entities"),
-             "Set the OSM types this filter should be used for.")
+             py::arg("entities"))
     ;
 
-    py::class_<pyosmium::BufferIterator>(m, "BufferIterator",
-    "Iterator interface for reading from a queue of buffers.")
+    py::class_<pyosmium::BufferIterator>(m, "BufferIterator")
     .def(py::init<py::args>())
     .def("__bool__", [](pyosmium::BufferIterator const &it) { return !it.empty(); })
     .def("__iter__", [](py::object const &self) { return self; })
-    .def("__next__", &pyosmium::BufferIterator::next,
-         "Get the next OSM object from the buffer or raise an StopIteration.")
+    .def("__next__", &pyosmium::BufferIterator::next)
     ;
 
     pyosmium::init_merge_input_reader(m);
diff --git a/lib/simple_writer.cc b/lib/simple_writer.cc
index 3f2c20cf..cac9aec4 100644
--- a/lib/simple_writer.cc
+++ b/lib/simple_writer.cc
@@ -338,36 +338,15 @@ namespace pyosmium {
 
 void init_simple_writer(pybind11::module &m)
 {
-    py::class_<SimpleWriter, BaseHandler>(m, "SimpleWriter",
-        "The most generic class to write osmium objects into a file. The writer "
-        "takes a file name as its mandatory parameter. The file must not yet "
-        "exist. The file type to output is determined from the file extension. "
-        "The second (optional) parameter is the buffer size. osmium caches the "
-        "output data in an internal memory buffer before writing it on disk. This "
-        "parameter allows changing the default buffer size of 4MB. Larger buffers "
-        "are normally better but you should be aware that there are normally multiple "
-        "buffers in use during the write process.\n\n"
-        "The writer will not overwrite existing files by default. Set `overwrite` "
-        "to True to allow overwriting.\n\n"
-        "The SimpleWriter can also functions as a handler and will write out "
-        "all node, ways and relations, it receives.")
+    py::class_<SimpleWriter, BaseHandler>(m, "SimpleWriter")
         .def(py::init<const char*, unsigned long, osmium::io::Header const *, bool, const std::string&>(),
              py::arg("filename"), py::arg("bufsz") = 4096*1024,
              py::arg("header") = nullptr,
              py::arg("overwrite") = false,
              py::arg("filetype") = "")
-        .def("add_node", &SimpleWriter::add_node, py::arg("node"),
-             "Add a new node to the file. The node may be an ``osmium.osm.Node`` object, "
-             "an ``osmium.osm.mutable.Node`` object or any other Python object that "
-             "implements the same attributes.")
-        .def("add_way", &SimpleWriter::add_way, py::arg("way"),
-             "Add a new way to the file. The way may be an ``osmium.osm.Way`` object, "
-             "an ``osmium.osm.mutable.Way`` object or any other Python object that "
-             "implements the same attributes.")
-        .def("add_relation", &SimpleWriter::add_relation, py::arg("relation"),
-             "Add a new relation to the file. The relation may be an "
-             "``osmium.osm.Relation`` object, an ``osmium.osm.mutable.Relation`` "
-             "object or any other Python object that implements the same attributes.")
+        .def("add_node", &SimpleWriter::add_node, py::arg("node"))
+        .def("add_way", &SimpleWriter::add_way, py::arg("way"))
+        .def("add_relation", &SimpleWriter::add_relation, py::arg("relation"))
         .def("add", [](SimpleWriter &self, py::object const &o) {
                            if (py::isinstance<pyosmium::COSMNode>(o) || py::hasattr(o, "location")) {
                                self.add_node(o);
@@ -378,14 +357,8 @@ void init_simple_writer(pybind11::module &m)
                            } else {
                                throw py::type_error("Need node, way or relation object.");
                            }
-                    },
-             "Add a new object to the file. The function will try to determine "
-             "the kind of object automatically.")
-        .def("close", &SimpleWriter::close,
-             "Flush the remaining buffers and close the writer. While it is not "
-             "strictly necessary to call this function explicitly, it is still "
-             "strongly recommended to close the writer as soon as possible, so "
-             "that the buffer memory can be freed.")
+                    })
+        .def("close", &SimpleWriter::close)
         .def("__enter__", [](py::object const &self) { return self; })
         .def("__exit__", [](SimpleWriter &self, py::args args) { self.close(); })
     ;
diff --git a/src/osmium/_osmium.pyi b/src/osmium/_osmium.pyi
index 56feac09..db418872 100644
--- a/src/osmium/_osmium.pyi
+++ b/src/osmium/_osmium.pyi
@@ -17,57 +17,205 @@ StrPath = Union[str, 'os.PathLike[str]']
 # Placeholder for more narrow type defintion to come
 HandlerLike = object
 
-class InvalidLocationError(Exception): ...
+class InvalidLocationError(Exception):
+    """ Raised when the location of a node is requested that has
+        no valid location. To be valid, a location must be
+        inside the -180 to 180 and -90 to 90 degree range.
+    """
 
 
-class BaseHandler: ...
+class BaseHandler:
+    """ Base class for all native handler functions in pyosmium.
+        Any class that derives from this class can be used for
+        parameters that need a handler-like object.
+    """
 
 
 class BaseFilter(BaseHandler):
-    def enable_for(self, entities: osm_entity_bits) -> None: ...
+    """ Base class for all native filter functions in pyosmium.
+        A filter is a handler that returns a boolean in the handler
+        functions indicating if the object should pass the filter (False)
+        or be dropped (True).
+    """
+    def enable_for(self, entities: osm_entity_bits) -> None:
+        """ Set the OSM types this filter should be applied to. If
+            an object has a type for wich the filter is not enabled,
+            the filter will be skipped completely. Or to put it in
+            different words: every object for which the filter is not
+            enabled, passes the filter automatically.
+        """
 
 
 class BufferIterator:
-    def __init__(self, *handlers: HandlerLike) -> None: ...
-    def __bool__(self) -> bool: ...
-    def __iter__(self) -> 'BufferIterator': ...
-    def __next__(self) -> OSMEntity: ...
+    """ (internal) Iterator interface for reading from a queue of buffers.
+
+        This class is needed for pyosmium's internal implementation. There is
+        currently no way to create buffers or add them to the iterator
+        from Python.
+    """
+    def __init__(self, *handlers: HandlerLike) -> None:
+        """ Create a new iterator. The iterator will pass each
+            object through the filter chain _handlers_ before returning
+            it.
+        """
+    def __bool__(self) -> bool:
+        """ True if there are any objects left to return.
+        """
+    def __iter__(self) -> 'BufferIterator':
+        """ Returns itself.
+        """
+    def __next__(self) -> OSMEntity:
+        """ Get the next OSM object from the buffer or raise an StopIteration.
+        """
 
 
 class MergeInputReader:
-    def __init__(self) -> None: ...
-    def _apply_internal(self, *handlers: HandlerLike, simplify: bool = ...) -> None: ...
-    def add_buffer(self, buffer: Union[ByteString, str], format: str) -> int: ...
-    def add_file(self, file: str) -> int: ...
-    def apply_to_reader(self, reader: Reader, writer: Writer, with_history: bool = ...) -> None: ...
-    def apply(self, *handlers: Any, idx: str = '', simplify: bool = True) -> None: ...
-
+    """ Buffer which collects data from multiple input files, sorts it
+        and optionally deduplicates the data before applying to a handler.
+    """
+    def __init__(self) -> None:
+        """ Initialize a new reader.
+        """
+    def _apply_internal(self, *handlers: HandlerLike, simplify: bool = ...) -> None:
+        """ Internal application function. Do not use.
+        """
+    def add_buffer(self, buffer: Union[ByteString, str], format: str) -> int:
+        """ Add input data from a buffer to the reader. The buffer may
+            be any data which follows the Python buffer protocol. The
+            manadatory _format_ parameter describes the format of the data.
+
+            The data will be copied into internal buffers, so that the input
+            buffer can be safely discarded after the function has been called.
+        """
+    def add_file(self, file: str) -> int:
+        """ Add data from the given input file _file_ to the reader.
+        """
+    def apply_to_reader(self, reader: Reader, writer: Writer, with_history: bool = ...) -> None:
+        """ Apply the collected data to data from the given _reader_ and write
+            the result to _writer_. This function can be used to merge the diff "
+            data together with other OSM data (for example when updating a
+            planet file. If _with_history_ is true, then the collected data will
+            be applied verbatim without removing duplicates. This is important
+            when using OSM history files as input.
+        """
+    def apply(self, *handlers: HandlerLike, idx: str = '', simplify: bool = True) -> None:
+        """ Apply collected data to a handler. The data will be sorted first.
+            If _simplify_ is true (default) then duplicates will be eliminated
+            and only the newest version of each object kept. If _idx_ is given
+            a node location cache with the given type will be created and
+            applied when creating the ways. Note that a diff file normally does
+            not contain all node locations to reconstruct changed ways. If the
+            full way geometries are needed, create a persistent node location
+            cache during initial import of the area and reuse it when processing
+            diffs. After the data
+            has been applied the buffer of the MergeInputReader is empty and
+            new data can be added for the next round of application.
+        """
 
 
 class SimpleWriter:
+    """ Basic writer for OSM data. The SimpleWriter can write out
+        object that are explicitly passed or function as a handler and
+        write out all objects it receives. It is also possible to
+        mix these two modes of operations.
+
+        The writer writes out the objects in the order it receives them.
+        It is the responsibility of the caller to ensure to follow the
+        [ordering conventions](../user_manual/01-First-Steps.ipynb#the-order-of-osm-files)
+        for OSM files.
+
+        The SimpleWriter should normally used as a context manager. If you
+        don't use it in a `with` context, don't forget to call `close()`,
+        when writing is finished.
+    """
     def __init__(self, filename: str, bufsz: int= ...,
                  header: Optional[Header]= ..., overwrite: bool= ...,
-                 filetype: str= ...) -> None: ...
-    def add_node(self, node: object) -> None: ...
-    def add_relation(self, relation: object) -> None: ...
-    def add_way(self, way: object) -> None: ...
-    def add(self, obj: object) -> None: ...
-    def close(self) -> None: ...
+                 filetype: str= ...) -> None:
+        """ Initiate a new writer for the file _filename_. The writer will
+            refuse to overwrite an already existing file unless _overwrite_
+            is explicitly set to `True`. The file type is usually determined
+            from the file extension. It can also be set explicitly with the
+            _filetype_ parameter.
+
+            The optional parameter _bufsz_ sets the size of the buffers used
+            for collecting the data before they are written out. The default
+            size is 4MB. Larger buffers are normally better but you should
+            be aware that there are normally multiple buffers in use during
+            the write process.
+        """
+    def add_node(self, node: object) -> None:
+        """ Add a new node to the file. The node may be a
+            [Node](Dataclasses.md#osmium.osm.Node] object or its mutable
+            variant or any other Python object that implements the same
+            attributes.
+        """
+    def add_relation(self, relation: object) -> None:
+        """ Add a new relation to the file. The relation may be a
+            [Relation](Dataclasses.md#osmium.osm.Relation] object or its mutable
+            variant or any other Python object that implements the same
+            attributes.
+        """
+    def add_way(self, way: object) -> None:
+        """ Add a new way to the file. The way may be a
+            [Way](Dataclasses.md#osmium.osm.Way] object or its mutable
+            variant or any other Python object that implements the same
+            attributes.
+        """
+    def add(self, obj: object) -> None:
+        """ Add a new object to the file. The function will try to determine
+            the kind of object automatically.
+        """
+    def close(self) -> None:
+        """ Flush the remaining buffers and close the writer. While it is not
+            strictly necessary to call this function explicitly, it is still
+            strongly recommended to close the writer as soon as possible, so
+            that the buffer memory can be freed.
+        """
     def __enter__(self) -> 'SimpleWriter':...
     def __exit__(self, *args: Any) -> None:...
 
 
 class NodeLocationsForWays:
-    apply_nodes_to_ways: bool
-    def __init__(self, locations: LocationTable) -> None: ...
-    def ignore_errors(self) -> None: ...
+    """ Handler for retriving and caching locations from ways
+        and adding them to ways.
+    """
+    @property
+    def apply_nodes_to_ways(self) -> bool:
+        """ When set (the default), the collected locations
+            are propagated to the node list of ways.
+        """
+    @apply_nodes_to_ways.setter
+    def apply_nodes_to_ways(self, value: bool) -> None:...
+
+    def __init__(self, locations: LocationTable) -> None:
+        """ Intiate a new handler using the given location table _locations_
+            to cache the node coordinates.
+        """
+    def ignore_errors(self) -> None:
+        """ Disable raising an exception when filling the node list of
+            a way and a coordinate is not available.
+        """
 
 
 class OsmFileIterator:
-    def __init__(self, reader: Reader, *handlers: HandlerLike) -> None: ...
-    def set_filtered_handler(self, handler: object) -> None: ...
-    def __iter__(self) -> 'OsmFileIterator': ...
-    def __next__(self) -> OSMEntity: ...
+    """ Low-level iterator interface for reading from an OSM source.
+    """
+    def __init__(self, reader: Reader, *handlers: HandlerLike) -> None:
+        """ Initialise a new iterator using the given _reader_ as source.
+            Each object is passed through the list of filters given by
+            _handlers_. If all the filters are passed, the object is
+            returned by `next()`.
+        """
+    def set_filtered_handler(self, handler: object) -> None:
+        """ Set a fallback handler for objects that have been filtered
+            out. The objects will be passed to the single handler.
+        """
+    def __iter__(self) -> 'OsmFileIterator':
+        """ Returns itself.
+        """
+    def __next__(self) -> OSMEntity:
+        """ Get the next OSM object from the file or raise a StopIteration.
+        """
 
 
 class IdTrackerIdFilter(BaseFilter): ...
@@ -91,4 +239,10 @@ class IdTracker:
     def way_ids(self) -> IdSet: ...
     def relation_ids(self) -> IdSet: ...
 
-def apply(reader: Union[Reader | str], *handlers: HandlerLike) -> None: ...
+def apply(reader: Union[Reader | str], *handlers: HandlerLike) -> None:
+    """ Apply a chain of handlers to the given input source. The input
+        source may be given either as a [Reader](IO.md#osmium.io.Reader) or
+        as a simple file name. If one of the handler is a
+        [filter](osmium.BaseFilter), then processing of the object will
+        be stopped if it does not pass the filter.
+    """
diff --git a/src/osmium/file_processor.py b/src/osmium/file_processor.py
index 953795c8..f1b238bc 100644
--- a/src/osmium/file_processor.py
+++ b/src/osmium/file_processor.py
@@ -12,15 +12,27 @@
 from osmium.osm.types import OSMEntity
 
 class FileProcessor:
-    """ A generator that emits OSM objects read from a file.
+    """ A processor that reads an OSM file in a streaming fashion,
+        optionally pre-filters the data, enhances it with geometry information,
+        returning the data via an iterator.
     """
 
-    def __init__(self, filename: Union[osmium.io.File, osmium.io.FileBuffer, str, Path],
+    def __init__(self, indata: Union[osmium.io.File, osmium.io.FileBuffer, str, Path],
                  entities: osmium.osm.osm_entity_bits=osmium.osm.ALL) -> None:
-        if isinstance(filename, (osmium.io.File, osmium.io.FileBuffer)):
-            self._file = filename
-        elif isinstance(filename, (str, Path)):
-            self._file = osmium.io.File(str(filename))
+        """ Initialise a new file processor for the given input source _indata_.
+            This may either be a filename, an instance of [File](IO.md#osmium.io.File)
+            or buffered data in form of a [FileBuffer](IO.md#osmium.io.FileBuffer).
+
+            The types of objects which will be read from the file can be
+            restricted with the _entities_ parameter. The data will be skipped
+            directly at the source file and will never be passed to any filters
+            including the location and area processors. You usually should not
+            be restricting objects, when using those.
+            """
+        if isinstance(indata, (osmium.io.File, osmium.io.FileBuffer)):
+            self._file = indata
+        elif isinstance(indata, (str, Path)):
+            self._file = osmium.io.File(str(indata))
         else:
             raise TypeError("File must be an osmium.io.File, osmium.io.FileBuffer, str or Path")
         self._entities = entities
@@ -32,20 +44,35 @@ def __init__(self, filename: Union[osmium.io.File, osmium.io.FileBuffer, str, Pa
 
     @property
     def header(self) -> osmium.io.Header:
-        """ Return the header information for the file to be read.
+        """ (read-only) [Header](IO.md#osmium.io.Header) information
+            for the file to be read.
         """
         return osmium.io.Reader(self._file, osmium.osm.NOTHING).header()
 
     @property
     def node_location_storage(self) -> Optional[LocationTable]:
-        """ Return the node location cache, if enabled.
+        """ Node location cache currently in use, if enabled.
             This can be used to manually look up locations of nodes.
+            Be aware that the nodes must have been read before you
+            can do a lookup via the location storage.
         """
         return self._node_store
 
     def with_locations(self, storage: str='flex_mem') -> 'FileProcessor':
-        """ Enable caching of node locations. This is necessary in order
-            to get geometries for ways and relations.
+        """ Enable caching of node locations. The file processor will keep
+            the coordinates of all nodes that are read from the file in
+            memory and automatically enhance the node list of ways with
+            the coordinates from the cache. This information can then be
+            used to create geometries for ways. The node location cache can
+            also be directly queried through the [node_location_storage]() property.
+
+            The _storage_ parameter can be used to change the type of cache
+            used to store the coordinates. The default 'flex_mem' is good for
+            small to medium-sized files. For large files you may need to
+            switch to a disk-storage based implementation because the cache
+            can become quite large. See the section on
+            [location storage in the user manual](../user_manual/03-Working-with-Geometries.ipynb#location-storage)
+            for more information.
         """
         if not (self._entities & osmium.osm.NODE):
             raise RuntimeError('Nodes not read from file. Cannot enable location cache.')
@@ -66,16 +93,14 @@ def with_areas(self, *filters: 'osmium._osmium.HandlerLike') -> 'FileProcessor':
             Optionally one or more filters can be passed. These filters
             will be applied in the first pass, when relation candidates
             for areas are selected.
-
             Calling this function multiple times causes more filters to
             be added to the filter chain.
 
-            Automatically enables location caching, if it was not yet set.
-            It uses the default location cache type. To use a different
-            cache type, you need to call with_locations() explicity.
-
-            Area processing requires that the file is read twice. This
-            happens transparently.
+            Calling this function automatically enables location caching
+            if it was not enabled yet using the default storage type.
+            To use a different storage type, call `with_locations()` explicity
+            with the approriate _storage_ parameter before calling this
+            function.
         """
         if self._area_handler is None:
             self._area_handler = osmium.area.AreaManager()
@@ -85,23 +110,43 @@ def with_areas(self, *filters: 'osmium._osmium.HandlerLike') -> 'FileProcessor':
         return self
 
     def with_filter(self, filt: 'osmium._osmium.HandlerLike') -> 'FileProcessor':
-        """ Add a filter function that is called before an object is
-            returned in the iterator. Filters are applied sequentially
-            in the order they were added.
+        """ Add a filter function to the processors filter chain.
+            Filters are called for each prcoessed object in the order they
+            have been installed. Only when the object passes all the
+            filter functions will it be handed to the iterator.
+
+            Note that any handler-like object can be installed as a filter.
+            A non-filtering handler simply works like an all-pass filter.
         """
         self._filters.append(filt)
         return self
 
 
     def handler_for_filtered(self, handler: 'osmium._osmium.HandlerLike') -> 'FileProcessor':
-        """ Set a handler to be called on all objects that have been
-            filtered out and are not presented to the iterator loop.
+        """ Set a fallback handler for object that have been filtered out.
+
+            Any object that does not pass the filter chain installed with
+            `with_filter()` will be passed to this handler. This can be useful
+            when the entire contents of a file should be passed to a writer
+            and only some of the objects need to be processed specially
+            in the iterator body.
         """
         self._filtered_handler = handler
         return self
 
     def __iter__(self) -> Iterator[OSMEntity]:
-        """ Return the iterator over the file.
+        """ Create a new iterator for the file processor. It is possible to
+            create mulitple iterators from the same processor and even run
+            them in parallel. However, you must not change the properties
+            of the file processor while a iterator is in progress of reading
+            a file.
+
+            When area processing is enabled, then the input data needs to
+            be read twice. The first pass reads the relations, while the
+            second pass reads the whole file. The iterator will do this
+            transparantly for the user. However, be aware that the first
+            pass of reading may take a while for large files, so that the
+            iterator might block before the first object is returned.
         """
         handlers: List['osmium._osmium.HandlerLike'] = []
 
diff --git a/src/osmium/helper.py b/src/osmium/helper.py
index 36f29a4f..22ec6b20 100644
--- a/src/osmium/helper.py
+++ b/src/osmium/helper.py
@@ -60,18 +60,6 @@ def __init__(self, filename: str, bufsz: int=4096*1024, filetype: str="") -> Non
 
 
 def _merge_apply(self: MergeInputReader, *handlers: 'HandlerLike', idx: str = '', simplify: bool = True) -> None:
-    """ Apply collected data to a handler. The data will be sorted first.
-        If `simplify` is true (default) then duplicates will be eliminated
-        and only the newest version of each object kept. If `idx` is given
-        a node location cache with the given type will be created and
-        applied when creating the ways. Note that a diff file normally does
-        not contain all node locations to reconstruct changed ways. If the
-        full way geometries are needed, create a persistent node location
-        cache during initial import of the area and reuse it when processing
-        diffs. After the data
-        has been applied the buffer of the MergeInputReader is empty and
-        new data can be added for the next round of application.
-    """
     if idx:
         lh = NodeLocationsForWays(create_map(idx))
         lh.ignore_errors()