Skip to content

Commit

Permalink
Always use html2text.py for FAQ, improve output
Browse files Browse the repository at this point in the history
A recent commit broke compilation with Python 3. The original author of
html2text.py is deceased and the fork has increased the number of files
for this "simple" helper.

The html2text.py script in this patch was rewritten and its output
matches with lynx (except for a few newlines around lists). This means
that indentation has been added for headings, paragraphs and lists.
Also, since it was written from scratch, a new license could be chosen
that matches Wireshark.

Since now the in-tree html2text.py script provides nicer output, remove
detection of the alternative programs (elinks, links). lynx/w3m is
somehow still necessary for asciidoc though.

(I also looked into reusing html2text.py for the release notes to
replace asciidoc, but the --format=html output produces different output
(HTML adds a ToC and section numbers). For now still require lynx for
release notes)

Tested with Python 2.6.6, 2.7.9, 3.2.6 and 3.4.3 under LC_ALL=C and
LC_ALL=en_US.UTF-8 on Linux. Tested reading from stdin and file, writing
to file, pipe and tty. Tested with cmake (Ninja) and autotools on Arch
Linux x86_64. Test:

    # For each $PATH per python version, execute (with varying LC_ALL)
    help/faq.py -b | tools/html2text.py /dev/stdin | md5sum
    help/faq.py -b | tools/html2text.py | md5sum
    help/faq.py -b | tools/html2text.py
    help/faq.py -b | tools/html2text.py >/dev/null

Change-Id: I6409450a3e6c8b010ca082251f9db7358b0cc2fd
Reviewed-on: https://code.wireshark.org/review/7779
Petri-Dish: Peter Wu <[email protected]>
Tested-by: Petri Dish Buildbot <[email protected]>
Reviewed-by: Anders Broman <[email protected]>
  • Loading branch information
Lekensteyn authored and AndersBroman committed Mar 25, 2015
1 parent 83b6338 commit 68698db
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 569 deletions.
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1416,10 +1416,9 @@ else()
endforeach()
endif(WIN32)
add_custom_command(TARGET copy_data_files PRE_BUILD
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/help/faq.py > faq.tmp.html
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/help/faq.py -b > faq.tmp.html
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/html2text.py
--width=72 --no-links faq.tmp.html
> "${DATAFILE_DIR}/help/faq.txt"
faq.tmp.html > "${DATAFILE_DIR}/help/faq.txt"
COMMAND ${CMAKE_COMMAND} -E remove faq.tmp.html
)

Expand Down
1 change: 0 additions & 1 deletion COPYING
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ covered by other licenses that are not themselves directly compatible with the
GPLv2. This is OK, as only the tools themselves are licensed this way, the
output of the tools is not considered a derived work, and so can be safely
licensed for Wireshark's use. An incomplete selection of these tools includes:
- the html2text utility (tools/html2text.py) is licensed under the GPLv3.
- the pidl utility (tools/pidl) is licensed under the GPLv3+.

Parts of Wireshark can be built and distributed as libraries. These
Expand Down
20 changes: 1 addition & 19 deletions cmake/modules/FindLYNX.cmake
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
#
# - Find unix commands from cygwin
# This module looks for some usual Unix commands.
# This module looks for lynx (used by asciidoc)
#

INCLUDE(FindCygwin)

FIND_PROGRAM(LYNX_EXECUTABLE
NAMES
lynx
elinks
links
true
PATHS
${CYGWIN_INSTALL_PATH}/bin
/bin
Expand All @@ -23,18 +20,3 @@ INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LYNX DEFAULT_MSG LYNX_EXECUTABLE)

MARK_AS_ADVANCED(LYNX_EXECUTABLE)

# Convert html to text
IF(LYNX_EXECUTABLE MATCHES lynx)
# (See Bug # 1446 for note re 'force-html' below)
set(HTML2TXT "lynx -dump -width=72 -nolist -stdin -force-html")
ELSEIF(LYNX_EXECUTABLE MATCHES elinks)
set(HTML2TXT "elinks -dump -dump-width 72")
ELSEIF(LYNX_EXECUTABLE MATCHES links)
set(HTML2TXT "links -dump -width 72")
ELSEIF(LYNX_EXECUTABLE MATCHES true)
set(HTML2TXT "true")
ELSE()
message(ERROR "Should never be reached - please report!")
ENDIF()
message(STATUS "html2text: ${HTML2TXT}")
14 changes: 1 addition & 13 deletions config.nmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ PROGRAM_FILES=$(PROGRAMFILES)
PROGRAM_FILES_W6432=$(PROGRAMW6432)

#
# Location of the "tools" directory. This affects HTML2TXT below and should
# be overridden by makefiles in any subdirectories that use HTML2TXT.
# Location of the "tools" directory. This affects the path to textify.ps1
!IFNDEF TOOLS_DIR
TOOLS_DIR=tools
!ENDIF
Expand Down Expand Up @@ -1321,17 +1320,6 @@ FOP=$(WIRESHARK_LIB_DIR)\fop-1.0\fop.bat
# Additional options to fop.
FOP_OPTS=-Xmx256m

# html to text converter for text version of release notes, e.g. elinks.
# This could also be "lynx", or "true" if neither elinks nor lynx is installed
# (cygwin: lynx works, elinks not available, links and true doesn't produce output)
#HTML2TXT=elinks -dump -dump-width 72
##HTML2TXT=links -dump -width 72 ## XXX: Fails: For links -dump requires 'url' (filename) arg.
#HTML2TXT=lynx -dump -width=72 -nolist -stdin

!IFNDEF HTML2TXT
HTML2TXT=$(PYTHON) $(TOOLS_DIR)\html2text.py --width=72 --no-links
!ENDIF

# the XSL processor (part of cygwin's libxslt package)
XSLTPROC="xsltproc"

Expand Down
12 changes: 4 additions & 8 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -760,23 +760,19 @@ AC_PATH_PROG(A2X, a2x)
AC_CHECK_PROG(HAVE_A2X, a2x, "yes", "no")
AM_CONDITIONAL(HAVE_A2X, test x$HAVE_A2X = xyes)

# Want to control a tape drive? Use mt. Want to convert HTML to text?
# Uhhhhh... elinks? lynx? w3m? pandoc? html2text?
AC_PATH_PROG(ELINKS, elinks)
AC_CHECK_PROG(HAVE_ELINKS, elinks, "yes", "no")
AM_CONDITIONAL(HAVE_ELINKS, test x$HAVE_ELINKS = xyes)

# Check for fop (translate .fo to e.g. pdf)
AC_PATH_PROG(FOP, fop)
AC_CHECK_PROG(HAVE_FOP, fop, "yes", "no")
AM_CONDITIONAL(HAVE_FOP, test x$HAVE_FOP = xyes)

# Check for lynx (html -> text)
# TODO: HAVE_LYNX and HAVE_W3M are unused. Maybe require one of them
# to be found when a2x is enabled? Otherwise it will fail later...
# Check for lynx (asciidoc text format from html)
AC_PATH_PROG(LYNX, lynx)
AC_CHECK_PROG(HAVE_LYNX, lynx, "yes", "no")
AM_CONDITIONAL(HAVE_LYNX, test x$HAVE_LYNX = xyes)

# Check for w3m (html -> text)
# Check for w3m (asciidoc text format from html)
AC_PATH_PROG(W3M, w3m)
AC_CHECK_PROG(HAVE_W3M, w3m, "yes", "no")
AM_CONDITIONAL(HAVE_W3M, test x$HAVE_W3M = xyes)
Expand Down
20 changes: 0 additions & 20 deletions docbook/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,6 @@ A2X_TEXT_OPTS=
A2X_TEXT_OPTS+="--lynx"
#endif

# html to text converter for text version of release notes, e.g. elinks.
# This could also be "lynx", or "true" if neither elinks nor lynx is installed
# (See Bug # 1446 for note re 'force-html' below)
# Sorry about the indenting, but that's what automake requires...
if HAVE_ELINKS
HTML2TXT=$(ELINKS) -dump -dump-width 72
## links: -dump requires 'url' argument (as opposed to elinks & lynx)
## (Rather than fixing things we'll just disable the use of links).
##else
##if HAVE_LINKS
##HTML2TXT=$(LINKS) -dump -width 72
else
if HAVE_LYNX
HTML2TXT=$(LYNX) -dump -width=72 -nolist -stdin -force-html
else
HTML2TXT="true"
endif
##endif
endif

############### YOU SHOULDN'T HAVE TO EDIT ANYTHING BELOW THIS LINE! ################

include Makefile.common
Expand Down
11 changes: 3 additions & 8 deletions help/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,8 @@ CLEANFILES = faq.txt
MAINTAINERCLEANFILES = \
Makefile.in

# Try our best to convert the FAQ to text.
# The output of html2text.py isn't as pretty as elinks, links, or lynx. If that ever changes, we
# can use it exclusively.
# Convert the FAQ to text.
faq.txt: $(srcdir)/faq.py
$(AM_V_GEN)$(srcdir)/faq.py >$@.tmp && \
command -v elinks > /dev/null && elinks -dump -dump-width 72 -no-numbering -no-references < $@.tmp > $@ || \
command -v links > /dev/null && links -width 72 -html-numbered-links 0 -dump $@.tmp > $@ || \
command -v lynx > /dev/null && lynx -dump -width=72 -nolist -stdin -force-html < $@.tmp > $@ || \
$(srcdir)/../tools/html2text.py --width=72 --no-links [email protected] > $@ && \
$(AM_V_GEN)$(srcdir)/faq.py -b >$@.tmp && \
$(srcdir)/../tools/html2text.py $@.tmp > $@ && \
rm -f $@.tmp
2 changes: 1 addition & 1 deletion help/Makefile.nmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ include ..\config.nmake
all: faq.txt

faq.txt: faq.py
$(PYTHON) faq.py | $(HTML2TXT) > $@
$(PYTHON) faq.py -b | $(PYTHON) $(TOOLS_DIR)\html2text.py > $@

clean:
rm -rf faq.txt
Expand Down
3 changes: 0 additions & 3 deletions tools/checklicenses.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,6 @@ def PrintUsage():
'tools/pidl': [
'UNKNOWN',
],
'tools/html2text.py': [
'UNKNOWN',
],
'tools/lemon': [
'UNKNOWN',
],
Expand Down
Loading

0 comments on commit 68698db

Please sign in to comment.