-
Notifications
You must be signed in to change notification settings - Fork 4
/
pylit.py
executable file
·1935 lines (1654 loc) · 62.8 KB
/
pylit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# pylit.py
# ********
# Literate programming with reStructuredText
# ++++++++++++++++++++++++++++++++++++++++++
#
# :Date: $Date$
# :Revision: $Revision$
# :URL: $URL$
# :Copyright: © 2005, 2007 Günter Milde.
# Released without warranty under the terms of the
# GNU General Public License (v. 2 or later)
#
# ::
"""pylit: bidirectional text <-> code converter
Covert between a *text source* with embedded computer code and a *code source*
with embedded documentation.
"""
# .. contents::
#
# Frontmatter
# ===========
#
# Changelog
# ---------
#
# .. class:: borderless
#
# ====== ========== ===========================================================
# 0.1 2005-06-29 Initial version.
# 0.1.1 2005-06-30 First literate version.
# 0.1.2 2005-07-01 Object orientated script using generators.
# 0.1.3 2005-07-10 Two state machine (later added 'header' state).
# 0.2b 2006-12-04 Start of work on version 0.2 (code restructuring).
# 0.2 2007-01-23 Published at http://pylit.berlios.de.
# 0.2.1 2007-01-25 Outsourced non-core documentation to the PyLit pages.
# 0.2.2 2007-01-26 New behaviour of `diff` function.
# 0.2.3 2007-01-29 New `header` methods after suggestion by Riccardo Murri.
# 0.2.4 2007-01-31 Raise Error if code indent is too small.
# 0.2.5 2007-02-05 New command line option --comment-string.
# 0.2.6 2007-02-09 Add section with open questions,
# Code2Text: let only blank lines (no comment str)
# separate text and code,
# fix `Code2Text.header`.
# 0.2.7 2007-02-19 Simplify `Code2Text.header`,
# new `iter_strip` method replacing a lot of ``if``-s.
# 0.2.8 2007-02-22 Set `mtime` of outfile to the one of infile.
# 0.3 2007-02-27 New `Code2Text` converter after an idea by Riccardo Murri,
# explicit `option_defaults` dict for easier customisation.
# 0.3.1 2007-03-02 Expand hard-tabs to prevent errors in indentation,
# `Text2Code` now also works on blocks,
# removed dependency on SimpleStates module.
# 0.3.2 2007-03-06 Bug fix: do not set `language` in `option_defaults`
# renamed `code_languages` to `languages`.
# 0.3.3 2007-03-16 New language css,
# option_defaults -> defaults = optparse.Values(),
# simpler PylitOptions: don't store parsed values,
# don't parse at initialisation,
# OptionValues: return `None` for non-existing attributes,
# removed -infile and -outfile, use positional arguments.
# 0.3.4 2007-03-19 Documentation update,
# separate `execute` function.
# 2007-03-21 Code cleanup in `Text2Code.__iter__`.
# 0.3.5 2007-03-23 Removed "css" from known languages after learning that
# there is no C++ style "// " comment string in CSS2.
# 0.3.6 2007-04-24 Documentation update.
# 0.4 2007-05-18 Implement Converter.__iter__ as stack of iterator
# generators. Iterating over a converter instance now
# yields lines instead of blocks.
# Provide "hooks" for pre- and postprocessing filters.
# Rename states to reduce confusion with formats:
# "text" -> "documentation", "code" -> "code_block".
# 0.4.1 2007-05-22 Converter.__iter__: cleanup and reorganisation,
# rename parent class Converter -> TextCodeConverter.
# 0.4.2 2007-05-23 Merged Text2Code.converter and Code2Text.converter into
# TextCodeConverter.converter.
# 0.4.3 2007-05-30 Replaced use of defaults.code_extensions with
# values.languages.keys().
# Removed spurious `print` statement in code_block_handler.
# Added basic support for 'c' and 'css' languages
# with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
# 0.5 2007-06-06 Moved `collect_blocks`_ out of `TextCodeConverter`_,
# bug fix: collect all trailing blank lines into a block.
# Expand tabs with `expandtabs_filter`_.
# 0.6 2007-06-20 Configurable code-block marker (default ``::``)
# 0.6.1 2007-06-28 Bug fix: reset self.code_block_marker_missing.
# 0.7 2007-12-12 prepending an empty string to sys.path in run_doctest()
# to allow imports from the current working dir.
# 0.7.1 2008-01-07 If outfile does not exist, do a round-trip conversion
# and report differences (as with outfile=='-').
# 0.7.2 2008-01-28 Do not add missing code-block separators with
# `doctest_run` on the code source. Keeps lines consistent.
# 0.7.3 2008-04-07 Use value of code_block_marker for insertion of missing
# transition marker in Code2Text.code_block_handler
# Add "shell" to defaults.languages
# 0.7.4 2008-06-23 Add "latex" to defaults.languages
# 0.7.5 2009-05-14 Bugfix: ignore blank lines in test for end of code block
# 0.7.6 2009-12-15 language-dependent code-block markers (after a
# `feature request and patch by jrioux`_),
# use DefaultDict for language-dependent defaults,
# new defaults setting `add_missing_marker`_.
# 0.7.7 2010-06-23 New command line option --codeindent.
# 0.7.8 2011-03-30 bugfix: do not overwrite custom `add_missing_marker` value,
# allow directive options following the 'code' directive.
# 0.7.9 2011-04-05 Decode doctest string if 'magic comment' gives encoding.
# 3.1 2013-09-16 Change to Python3: print statement and exec statements
# removed. String formatting % changed to .format().
# Replaced raise statements and except statements.
# Upgrade for ``dict`` method changes.
# Cleanup ``DefaultDict`` to be more easily replaced.
# Adjust trailing space handling to match unit tests better.
# Provide ``with`` statements for all file contexts.
# 3.1.1 2019-04-05 Add tox version checking. Change distribution sightly to
# fit better with PyPA standards
# ====== ========== ===========================================================
#
# To Do List
# ----------
#
# * Replace home-brewed DefaultDict with collections.defaultdict.
#
# * Replace optparse with argparse.
#
# `Documentation/library/argparse.html#upgrading-optparse-code <file:///Library/Frameworks/Python.framework/Versions/3.2/Resources/English.lproj/Documentation/library/argparse.html#upgrading-optparse-code>`_
#
# ::
_version = "3.1.1"
__docformat__ = 'restructuredtext'
# Introduction
# ------------
#
# PyLit is a bidirectional converter between two formats of a computer
# program source:
#
# * a (reStructured) text document with program code embedded in
# *code blocks*, and
# * a compilable (or executable) code source with *documentation*
# embedded in comment blocks
#
#
# Requirements
# ------------
#
# ::
import os, sys
import re
import contextlib
import warnings
#from collections import defaultdict # TODO
import optparse
#import argparse # TODO
# DefaultDict
# ~~~~~~~~~~~
# As `collections.defaultdict` is only introduced in Python 2.5, we
# define a Python3 compatible version of the dictionary with default from
# http://code.activestate.com/recipes/389639/
#
# ::
class DefaultDict(dict):
"""Minimalistic Dictionary with default value."""
def __init__(self, default_factory=lambda:None, *args, **kwargs):
self.update(dict(*args, **kwargs))
self.default_factory = default_factory
def __getitem__(self, key):
return self.get(key, self.default_factory() )
# TODO: Replace with proper Python3 ``collections.defaultdict``
# Defaults
# ========
#
# The `defaults` object provides a central repository for default
# values and their customisation. ::
defaults = optparse.Values()
# It is used for
#
# * the initialisation of data arguments in TextCodeConverter_ and
# PylitOptions_
#
# * completion of command line options in `PylitOptions.complete_values`_.
#
# This allows the easy creation of back-ends that customise the
# defaults and then call `main`_ e.g.:
#
# >>> import pylit
# >>> pylit.defaults.comment_string = "## "
# >>> pylit.defaults.codeindent = 4
# >>> pylit.main()
#
# The following default values are defined in pylit.py:
#
# languages
# ---------
#
# Mapping of code file extensions to code language::
defaults.languages = DefaultDict(lambda:"python", # fallback language
{".c": "c",
".cc": "c++",
".css": "css",
".py": "python",
".sh": "shell",
".sl": "slang",
".sty": "latex",
".tex": "latex"
})
# Will be overridden by the ``--language`` command line option.
#
# The first argument is the fallback language, used if there is no
# matching extension (e.g. if pylit is used as filter) and no
# ``--language`` is specified. It can be changed programmatically by
# assignment to the ``.default`` attribute, e.g.
#
# >>> defaults.languages.default='c++'
#
#
# .. _text_extension:
#
# text_extensions
# ---------------
#
# List of known extensions of (reStructured) text files. The first
# extension in this list is used by the `_get_outfile_name`_ method to
# generate a text output filename::
defaults.text_extensions = [".txt", ".rst"]
# fs
# ---------------
#
# Comment strings for known languages. Used in Code2Text_ to recognise
# text blocks and in Text2Code_ to format text blocks as comments.
# Defaults to ``'# '``.
#
# **Comment strings include trailing whitespace.** ::
defaults.comment_strings = DefaultDict(lambda:'# ',
{"css": '// ',
"c": '// ',
"c++": '// ',
"latex": '% ',
"python": '# ',
"shell": '# ',
"slang": '% '
})
# header_string
# -------------
#
# Marker string for a header code block in the text source. No trailing
# whitespace needed as indented code follows.
# Must be a valid rst directive that accepts code on the same line, e.g.
# ``'..admonition::'``.
#
# Default is a comment marker::
defaults.header_string = '..'
# .. _code_block_marker:
#
# code_block_markers
# ------------------
#
# Markup at the end of a documentation block.
# Default is Docutils' marker for a `literal block`_::
defaults.code_block_markers = DefaultDict(lambda:'::')
# The `code_block_marker` string is `inserted into a regular expression`_.
# Language-specific markers can be defined programmatically, e.g. in a
# wrapper script.
#
# In a document where code examples are only one of several uses of
# literal blocks, it is more appropriate to single out the source code
# ,e.g. with the double colon at a separate line ("expanded form")
#
# ``defaults.code_block_marker.default = ':: *'``
#
# or a dedicated ``.. code-block::`` directive [#]_
#
# ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
#
# The latter form also allows code in different languages kept together
# in one literate source file.
#
# .. [#] The ``.. code-block::`` directive is not (yet) supported by
# standard Docutils. It is provided by several add-ons, including
# the `code-block directive`_ project in the Docutils Sandbox and
# Sphinx_.
#
#
# strip
# -----
#
# Export to the output format stripping documentation or code blocks::
defaults.strip = False
# strip_marker
# ------------
#
# Strip literal marker from the end of documentation blocks when
# converting to code format. Makes the code more concise but looses the
# synchronisation of line numbers in text and code formats. Can also be used
# (together with the auto-completion of the code-text conversion) to change
# the `code_block_marker`::
defaults.strip_marker = False
# add_missing_marker
# ------------------
#
# When converting from code format to text format, add a `code_block_marker`
# at the end of documentation blocks if it is missing::
defaults.add_missing_marker = True
# Keep this at ``True``, if you want to re-convert to code format later!
#
#
# .. _defaults.preprocessors:
#
# preprocessors
# -------------
#
# Preprocess the data with language-specific filters_
# Set below in Filters_::
defaults.preprocessors = {}
# .. _defaults.postprocessors:
#
# postprocessors
# --------------
#
# Postprocess the data with language-specific filters_::
defaults.postprocessors = {}
# .. _defaults.codeindent:
#
# codeindent
# ----------
#
# Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
defaults.codeindent = 2
# In `Text2Code.code_block_handler`_, the codeindent is determined by the
# first recognised code line (header or first indented literal block
# of the text source).
#
# overwrite
# ---------
#
# What to do if the outfile already exists? (ignored if `outfile` == '-')::
defaults.overwrite = 'update'
# Recognised values:
#
# :'yes': overwrite eventually existing `outfile`,
# :'update': fail if the `outfile` is newer than `infile`,
# :'no': fail if `outfile` exists.
#
#
# Extensions
# ==========
#
# Try to import optional extensions::
try:
import pylit_elisp
except ImportError:
pass
# Converter Classes
# =================
#
# The converter classes implement a simple state machine to separate and
# transform documentation and code blocks. For this task, only a very limited
# parsing is needed. PyLit's parser assumes:
#
# * `indented literal blocks`_ in a text source are code blocks.
#
# * comment blocks in a code source where every line starts with a matching
# comment string are documentation blocks.
#
# TextCodeConverter
# -----------------
# ::
class TextCodeConverter:
"""Parent class for the converters `Text2Code` and `Code2Text`.
"""
# The parent class defines data attributes and functions used in both
# `Text2Code`_ converting a text source to executable code source, and
# `Code2Text`_ converting commented code to a text source.
#
# Data attributes
# ~~~~~~~~~~~~~~~
#
# Class default values are fetched from the `defaults`_ object and can be
# overridden by matching keyword arguments during class instantiation. This
# also works with keyword arguments to `get_converter`_ and `main`_, as these
# functions pass on unused keyword args to the instantiation of a converter
# class. ::
language = defaults.languages.default_factory()
comment_strings = defaults.comment_strings
comment_string = "" # set in __init__ (if empty)
codeindent = defaults.codeindent
header_string = defaults.header_string
code_block_markers = defaults.code_block_markers
code_block_marker = "" # set in __init__ (if empty)
strip = defaults.strip
strip_marker = defaults.strip_marker
add_missing_marker = defaults.add_missing_marker
directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
state = "" # type of current block, see `TextCodeConverter.convert`_
# Interface methods
# ~~~~~~~~~~~~~~~~~
#
# .. _TextCodeConverter.__init__:
#
# __init__
# """"""""
#
# Initialising sets the `data` attribute, an iterable object yielding lines of
# the source to convert. [#]_
#
# .. [#] The most common choice of data is a `file` object with the text
# or code source.
#
# To convert a string into a suitable object, use its splitlines method
# like ``"2 lines\nof source".splitlines(True)``.
#
#
# Additional keyword arguments are stored as instance variables,
# overwriting the class defaults::
def __init__(self, data, **keyw):
"""data -- iterable data object
(list, file, generator, string, ...)
**keyw -- remaining keyword arguments are
stored as data-attributes
"""
self.data = data
self.__dict__.update(keyw)
# If empty, `code_block_marker` and `comment_string` are set according
# to the `language`::
if not self.code_block_marker:
self.code_block_marker = self.code_block_markers[self.language]
if not self.comment_string:
self.comment_string = self.comment_strings[self.language]
self.stripped_comment_string = self.comment_string.rstrip()
# Pre- and post-processing filters are set (with
# `TextCodeConverter.get_filter`_)::
self.preprocessor = self.get_filter("preprocessors", self.language)
self.postprocessor = self.get_filter("postprocessors", self.language)
# .. _inserted into a regular expression:
#
# Finally, a regular_expression for the `code_block_marker` is compiled
# to find valid cases of `code_block_marker` in a given line and return
# the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
marker = self.code_block_marker
if marker == '::':
# the default marker may occur at the end of a text line
self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
else:
# marker must be on a separate line
self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
# .. _TextCodeConverter.__iter__:
#
# __iter__
# """"""""
#
# Return an iterator for the instance. Iteration yields lines of converted
# data.
#
# The iterator is a chain of iterators acting on `self.data` that does
#
# * preprocessing
# * text<->code format conversion
# * postprocessing
#
# Pre- and post- processing are only performed if filters for the current
# language are registered in `defaults.preprocessors`_ and|or
# `defaults.postprocessors`_. The filters must accept an iterable as first
# argument and yield the processed input data line-wise.
# ::
def __iter__(self):
"""Iterate over input data source and yield converted lines
"""
return self.postprocessor(self.convert(self.preprocessor(self.data)))
# .. _TextCodeConverter.__call__:
#
# __call__
# """"""""
# The special `__call__` method allows the use of class instances as callable
# objects. It returns the converted data as list of lines::
def __call__(self):
"""Iterate over state-machine and return results as list of lines"""
return [line for line in self]
# .. _TextCodeConverter.__str__:
#
# __str__
# """""""
# Return converted data as string::
def __str__(self):
return "".join(self())
# Helpers and convenience methods
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# .. _TextCodeConverter.convert:
#
# convert
# """""""
#
# The `convert` method generates an iterator that does the actual code <-->
# text format conversion. The converted data is yielded line-wise and the
# instance's `status` argument indicates whether the current line is "header",
# "documentation", or "code_block"::
def convert(self, lines):
"""Iterate over lines of a program document and convert
between "text" and "code" format
"""
# Initialise internal data arguments. (Done here, so that every new iteration
# re-initialises them.)
#
# `state`
# the "type" of the currently processed block of lines. One of
#
# :"": initial state: check for header,
# :"header": leading code block: strip `header_string`,
# :"documentation": documentation part: comment out,
# :"code_block": literal blocks containing source code: unindent.
#
# ::
self.state = ""
# `_codeindent`
# * Do not confuse the internal attribute `_codeindent` with the configurable
# `codeindent` (without the leading underscore).
# * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
# first non-blank "code_block" line and stripped from all "code_block" lines
# in the text-to-code conversion,
# * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
# "code_block" lines in the code-to-text conversion.
#
# ::
self._codeindent = 0
# `_textindent`
# * set by `Text2Code.documentation_handler`_ to the minimal indent of a
# documentation block,
# * used in `Text2Code.set_state`_ to find the end of a code block.
#
# ::
self._textindent = 0
# `_add_code_block_marker`
# If the last paragraph of a documentation block does not end with a
# code_block_marker_, it should be added (otherwise, the back-conversion
# fails.).
#
# `_add_code_block_marker` is set by `Code2Text.documentation_handler`_
# and evaluated by `Code2Text.code_block_handler`_, because the
# documentation_handler does not know whether the next block will be
# documentation (with no need for a code_block_marker) or a code block.
#
# ::
self._add_code_block_marker = False
# Determine the state of the block and convert with the matching "handler"::
for block in collect_blocks(expandtabs_filter(lines)):
try:
self.set_state(block)
except StopIteration:
return
for line in getattr(self, self.state+"_handler")(block):
yield line
# .. _TextCodeConverter.get_filter:
#
# get_filter
# """"""""""
# ::
def get_filter(self, filter_set, language):
"""Return language specific filter"""
if self.__class__ == Text2Code:
key = "text2"+language
elif self.__class__ == Code2Text:
key = language+"2text"
else:
key = ""
try:
return getattr(defaults, filter_set)[key]
except (AttributeError, KeyError):
# print( "there is no {0!r} filter in {1!r}".format(key, filter_set) )
pass
return identity_filter
# get_indent
# """"""""""
# Return the number of leading spaces in `line`::
def get_indent(self, line):
"""Return the indentation of `string`.
"""
return len(line) - len(line.lstrip())
# Text2Code
# ---------
#
# The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
# Code blocks are unindented, documentation is commented (or filtered, if the
# ``strip`` option is True).
#
# .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
# literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
# treated as part of the documentation. This allows the inclusion of
# examples:
#
# >>> 23 + 3
# 26
#
# Mark that there is no double colon before the doctest block in the
# text source.
#
# The class inherits the interface and helper functions from
# TextCodeConverter_ and adds functions specific to the text-to-code format
# conversion::
class Text2Code(TextCodeConverter):
"""Convert a (reStructured) text source to code source
"""
# .. _Text2Code.set_state:
#
# set_state
# ~~~~~~~~~
# ::
def set_state(self, block):
"""Determine state of `block`. Set `self.state`.
"""
# ``set_state()`` is used inside an iteration. Hence, if we are out of data, a
# ``StopItertion`` exception should be raised::
if not block:
raise StopIteration
# The new state depends on the active state (from the last block) and
# features of the current block. It is either "header", "documentation", or
# "code_block".
#
# If the current state is "" (first block), check for
# the `header_string` indicating a leading code block::
if self.state == "":
# print( "set state for {0!r}".format(block) )
if block[0].startswith(self.header_string):
self.state = "header"
else:
self.state = "documentation"
# If the current state is "documentation", the next block is also
# documentation. The end of a documentation part is detected in the
# `Text2Code.documentation_handler`_::
# elif self.state == "documentation":
# self.state = "documentation"
# A "code_block" ends with the first less indented, non-blank line.
# `_textindent` is set by the documentation handler to the indent of the
# preceding documentation block::
elif self.state in ["code_block", "header"]:
indents = [self.get_indent(line) for line in block
if line.rstrip()]
# print "set_state:", indents, self._textindent
if indents and min(indents) <= self._textindent:
self.state = 'documentation'
else:
self.state = 'code_block'
# TODO: (or not to do?) insert blank line before the first line with too-small
# codeindent using self.ensure_trailing_blank_line(lines, line) (would need
# split and push-back of the documentation part)?
#
# .. _Text2Code.header_handler:
#
# header_handler
# ~~~~~~~~~~~~~~
#
# Sometimes code needs to remain on the first line(s) of the document to be
# valid. The most common example is the "shebang" line that tells a POSIX
# shell how to process an executable file::
#!/usr/bin/env python
# In Python, the special comment to indicate the encoding, e.g.
# ``# -*- coding: iso-8859-1 -*-``, must occur before any other comment
# or code too.
#
# If we want to keep the line numbers in sync for text and code source, the
# reStructured Text markup for these header lines must start at the same line
# as the first header line. Therefore, header lines could not be marked as
# literal block (this would require the ``::`` and an empty line above the
# code_block).
#
# OTOH, a comment may start at the same line as the comment marker and it
# includes subsequent indented lines. Comments are visible in the reStructured
# Text source but hidden in the pretty-printed output.
#
# With a header converted to comment in the text source, everything before
# the first documentation block (i.e. before the first paragraph using the
# matching comment string) will be hidden away (in HTML or PDF output).
#
# This seems a good compromise, the advantages
#
# * line numbers are kept
# * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
# * greater flexibility: you can hide a repeating header in a project
# consisting of many source files.
#
# set off the disadvantages
#
# - it may come as surprise if a part of the file is not "printed",
# - one more syntax element to learn for rst newbies to start with pylit,
# (however, starting from the code source, this will be auto-generated)
#
# In the case that there is no matching comment at all, the complete code
# source will become a comment -- however, in this case it is not very likely
# the source is a literate document anyway.
#
# If needed for the documentation, it is possible to quote the header in (or
# after) the first documentation block, e.g. as `parsed literal`.
# ::
def header_handler(self, lines):
"""Format leading code block"""
# strip header string from first line
lines[0] = lines[0].replace(self.header_string, "", 1)
# yield remaining lines formatted as code-block
for line in self.code_block_handler(lines):
yield line
# .. _Text2Code.documentation_handler:
#
# documentation_handler
# ~~~~~~~~~~~~~~~~~~~~~
#
# The 'documentation' handler processes everything that is not recognised as
# "code_block". Documentation is quoted with `self.comment_string`
# (or filtered with `--strip=True`).
# If end-of-documentation marker is detected,
#
# * set state to 'code_block'
#
# * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
# next "documentation" block)
#
# ::
def documentation_handler(self, lines):
"""Convert documentation blocks from text to code format
"""
for line in lines:
# test lines following the code-block marker for false positives
if (self.state == "code_block" and line.rstrip()
and not self.directive_option_regexp.search(line)):
self.state = "documentation"
# test for end of documentation block
if self.marker_regexp.search(line):
self.state = "code_block"
self._textindent = self.get_indent(line)
# yield lines
if self.strip:
continue
# do not comment blank lines preceding a code block
if self.state == "code_block" and not line.rstrip():
yield line
else:
# Avoid a trailing space on a comment-only line.
if line.rstrip():
yield self.comment_string + line
else:
yield self.stripped_comment_string + line
# .. _Text2Code.code_block_handler:
#
# code_block_handler
# ~~~~~~~~~~~~~~~~~~
#
# The "code_block" handler is called with an indented literal block. It
# removes leading whitespace up to the indentation of the first code line in
# the file (this deviation from Docutils behaviour allows indented blocks of
# Python code). ::
def code_block_handler(self, block):
"""Convert indented literal blocks to source code format
"""
# If still unset, determine the indentation of code blocks from first non-blank
# code line::
if self._codeindent == 0:
self._codeindent = self.get_indent(block[0])
# Yield unindented lines after check whether we can safely unindent. If the
# line is less indented then `_codeindent`, something got wrong. ::
for line in block:
if line.lstrip() and self.get_indent(line) < self._codeindent:
raise ValueError("code block contains line less indented " \
"than {0:d} spaces \n{1!r}".format(self._codeindent, block))
yield line.replace(" "*self._codeindent, "", 1)
# Code2Text
# ---------
#
# The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
# a source in "code format" (i.e. in a programming language), extracts
# documentation from comment blocks, and puts program code in literal blocks.
#
# The class inherits the interface and helper functions from
# TextCodeConverter_ and adds functions specific to the text-to-code format
# conversion::
class Code2Text(TextCodeConverter):
"""Convert code source to text source
"""
# set_state
# ~~~~~~~~~
#
# Check if block is "header", "documentation", or "code_block":
#
# A paragraph is "documentation", if every non-blank line starts with a
# matching comment string (including whitespace except for commented blank
# lines) ::
def set_state(self, block):
"""Determine state of `block`."""
for line in block:
# skip documentation lines (commented, blank or blank comment)
if (line.startswith(self.comment_string)
or not line.rstrip()
or line.rstrip() == self.comment_string.rstrip()
):
continue
# non-commented line found:
if self.state == "":
self.state = "header"
else:
self.state = "code_block"
break
else:
# no code line found
# keep state if the block is just a blank line
# if len(block) == 1 and self._is_blank_codeline(line):
# return
self.state = "documentation"
# header_handler
# ~~~~~~~~~~~~~~
#
# Handle a leading code block. (See `Text2Code.header_handler`_ for a
# discussion of the "header" state.) ::
def header_handler(self, lines):
"""Format leading code block"""
if self.strip == True:
return
# get iterator over the lines that formats them as code-block
lines = iter(self.code_block_handler(lines))
# prepend header string to first line
yield self.header_string + next(lines)
# yield remaining lines
for line in lines:
yield line
# .. _Code2Text.documentation_handler:
#
# documentation_handler
# ~~~~~~~~~~~~~~~~~~~~~
#
# The *documentation state* handler converts a comment to a documentation
# block by stripping the leading `comment string` from every line::
def documentation_handler(self, block):
"""Uncomment documentation blocks in source code
"""
# Strip comment strings::
lines = [self.uncomment_line(line) for line in block]
# If the code block is stripped, the literal marker would lead to an
# error when the text is converted with Docutils. Strip it as well. ::
if self.strip or self.strip_marker:
self.strip_code_block_marker(lines)
# Otherwise, check for the `code_block_marker`_ at the end of the
# documentation block (skipping directive options that might follow it)::
elif self.add_missing_marker:
for line in lines[::-1]:
if self.marker_regexp.search(line):
self._add_code_block_marker = False
break
if (line.rstrip() and
not self.directive_option_regexp.search(line)):
self._add_code_block_marker = True
break
else:
self._add_code_block_marker = True
# Yield lines::
for line in lines:
yield line
# uncomment_line
# ~~~~~~~~~~~~~~
#
# Return documentation line after stripping comment string. Consider the
# case that a blank line has a comment string without trailing whitespace::
def uncomment_line(self, line):
"""Return uncommented documentation line"""
line = line.replace(self.comment_string, "", 1)
if line.rstrip() == self.stripped_comment_string:
line = line.replace(self.stripped_comment_string, "", 1)
return line
# .. _Code2Text.code_block_handler:
#
# code_block_handler