diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 59a403aa3..000000000 --- a/.gitignore +++ /dev/null @@ -1,103 +0,0 @@ -# Mac-specific files -.DS_Store - -# Tool-specific files -.ipynb_checkpoints/ -__pycache__/ - -# ipypublish sources -ipypublish -ipypublish-master - -# Chapter target files -html/*.html -html/*_files/ -beta/html/*.html -beta/html/*_files/ - -code/*.py -code/*.py.out -beta/code/*.py -beta/code/*.py.out - -mypy/*.py - -slides/*.html -slides/*_files/ -beta/slides/*.html -beta/slides/*_files/ - -pdf/*.pdf -pdf/*.tex -pdf/*_files/ - -*.blg - -nbpdf/*.pdf - -word/*.docx -word/*_files/ - -epub/*.epub -epub/*_files/ - -markdown/*.md -markdown/*_files/ - -full_notebooks/*.ipynb -full_notebooks/*_utils/* -full_notebooks/bookutils/* - -rendered/*.ipynb - -.depend/*.makefile - -notebooks/.jupyterlab.pid - -# Derived pics -notebooks/PICS/Sitemap.svg - -# Temp files -book -*book -geckodriver.log -orders.db -FuzzManager -simply-buggy -**/my_project -import*.py -.ipynb_checkpoints - -# Docker-make output directory -build-output/ - -# LS -.virtual_documents -notebooks/notebooks - -# Logs -binder.log -*.py.out -.jupyter.log -.jupyterlab.log - -# Caches -.mypy_cache/ - -# .bib timestamps -.*.bib.* - -# Zeller's files -Security Testing -projects -papers -course -udacity -videos - -*.command -*.webloc -notebooks/CISPACourse.ipynb -notebooks/BugBoard.ipynb -notebooks/Todos.ipynb -notebooks/SimpleGrammarMiner.ipynb diff --git a/.gitignore b/.gitignore new file mode 120000 index 000000000..a35631426 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +notebooks/shared/gitignore \ No newline at end of file diff --git a/Chapters.makefile b/Chapters.makefile index 09cf89e0d..750d2a47d 100644 --- a/Chapters.makefile +++ b/Chapters.makefile @@ -9,6 +9,9 @@ BOOKTITLE = The Fuzzing Book AUTHORS = Andreas Zeller, Rahul Gopinath, Marcel Böhme, Gordon Fraser, and Christian Holler TWITTER = @FuzzingBook +# Where the shared files are +SHARED = notebooks/shared/ + # Chapter(s) to be marked as "new" in menu NEW_CHAPTERS = diff --git a/Makefile b/Makefile deleted file mode 100644 index e0f3ecc3d..000000000 --- a/Makefile +++ /dev/null @@ -1,1285 +0,0 @@ -# Fuzzingbook/Debuggingbook Makefile - -# Get chapter files -CHAPTERS_MAKEFILE = Chapters.makefile -include $(CHAPTERS_MAKEFILE) - -# All source notebooks -SOURCE_FILES = \ - $(FRONTMATTER) \ - $(CHAPTERS) \ - $(APPENDICES) \ - $(EXTRAS) - -# The bibliography file -BIB = fuzzingbook.bib - -# The utilities folder -UTILS = bookutils - -# The utilities in $(UTILS) -UTILITY_FILES = \ - __init__.py \ - PrettyTable.py \ - README.md \ - export_notebook_code.py \ - import_notebooks.py \ - set_fixed_seed.py - -# Where the notebooks are -NOTEBOOKS = notebooks - -# Derived versions including HTML, SVG, and text output cells (for Web) -FULL_NOTEBOOKS = full_notebooks - -# Derived versions including PNG and text output cells, -# but without excursions (for LaTeX and PDF) -RENDERED_NOTEBOOKS = rendered - -# Git repo -GITHUB_REPO = https://github.com/uds-se/$(PROJECT)/ -BINDER_URL = https://mybinder.org/v2/gh/uds-se/$(PROJECT)/master?filepath=docs/beta/notebooks/00_Table_of_Contents.ipynb -PROJECT_URL = https://beta.$(PROJECT).org - -# Sources in the notebooks folder -SOURCES = $(SOURCE_FILES:%=$(NOTEBOOKS)/%) -CHAPTER_SOURCES = $(CHAPTERS:%=$(NOTEBOOKS)/%) -ALL_CHAPTER_SOURCES = $(CHAPTERS:%=$(NOTEBOOKS)/%) -PUBLIC_SOURCES = $(PUBLIC_CHAPTERS:%=$(NOTEBOOKS)/%) -READY_SOURCES = $(READY_CHAPTERS:%=$(NOTEBOOKS)/%) -TODO_SOURCES = $(TODO_CHAPTERS:%=$(NOTEBOOKS)/%) -NEW_SOURCES = $(NEW_CHAPTERS:%=$(NOTEBOOKS)/%) -APPENDICES_SOURCES = $(APPENDICES:%=$(NOTEBOOKS)/%) - -# Where to place the pdf, html, slides -PDF_TARGET = pdf/ -NBPDF_TARGET = nbpdf/ -HTML_TARGET = html/ -SLIDES_TARGET = slides/ -CODE_TARGET = code/ -MYPY_TARGET = mypy/ -MARKDOWN_TARGET = markdown/ -WORD_TARGET = word/ -EPUB_TARGET = epub/ -DEPEND_TARGET = .depend/ -DOCS_TARGET = docs/ - -# If BETA=y, we create files in the "beta" subdir. Use 'make docs-beta', 'make html-beta' to invoke -ifdef BETA -DOCS_TARGET := docs/beta/ -HTML_TARGET := beta/$(HTML_TARGET) -SLIDES_TARGET := beta/$(SLIDES_TARGET) -CODE_TARGET := beta/$(CODE_TARGET) -BETA_FLAG = --include-ready --include-todo -endif -ifndef BETA -# Avoid warning: undefined variable `BETA_FLAG' -BETA_FLAG = -endif - -# Files to appear in the table of contents -ifndef BETA -CHAPTER_SOURCES := $(PUBLIC_CHAPTERS:%=$(NOTEBOOKS)/%) -endif -ifdef BETA -PUBLIC_CHAPTERS := $(CHAPTERS) -endif -TOC_CHAPTERS := $(PUBLIC_CHAPTERS) -TOC_APPENDICES = $(APPENDICES) - -# Files to appear on the Web page -DOCS = \ - $(FRONTMATTER:%.ipynb=%) \ - $(TOC_CHAPTERS:%.ipynb=%) \ - $(APPENDICES:%.ipynb=%) \ - $(EXTRAS:%.ipynb=%) - - -# Various derived files -TEXS = $(SOURCE_FILES:%.ipynb=$(PDF_TARGET)%.tex) -PDFS = $(SOURCE_FILES:%.ipynb=$(PDF_TARGET)%.pdf) -NBPDFS = $(SOURCE_FILES:%.ipynb=$(NBPDF_TARGET)%.pdf) -HTMLS = $(SOURCE_FILES:%.ipynb=$(HTML_TARGET)%.html) -SLIDES = $(SOURCE_FILES:%.ipynb=$(SLIDES_TARGET)%.slides.html) -PYS = $(SOURCE_FILES:%.ipynb=$(CODE_TARGET)%.py) \ - $(CODE_TARGET)setup.py \ - $(CODE_TARGET)__init__.py -MYPYS = $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET)%.py) -WORDS = $(SOURCE_FILES:%.ipynb=$(WORD_TARGET)%.docx) -MARKDOWNS = $(SOURCE_FILES:%.ipynb=$(MARKDOWN_TARGET)%.md) -EPUBS = $(SOURCE_FILES:%.ipynb=$(EPUB_TARGET)%.epub) -FULLS = $(FULL_NOTEBOOKS)/$(UTILS) \ - $(UTILITY_FILES:%=$(FULL_NOTEBOOKS)/$(UTILS)/%) \ - $(SOURCE_FILES:%.ipynb=$(FULL_NOTEBOOKS)/%.ipynb) -RENDERS = $(SOURCE_FILES:%.ipynb=$(RENDERED_NOTEBOOKS)/%.ipynb) - -DEPENDS = $(SOURCE_FILES:%.ipynb=$(DEPEND_TARGET)%.makefile) - -CHAPTER_PYS = $(CHAPTERS:%.ipynb=$(CODE_TARGET)%.py) - -PDF_FILES = $(SOURCE_FILES:%.ipynb=$(PDF_TARGET)%_files) -NBPDF_FILES = $(SOURCE_FILES:%.ipynb=$(NBPDF_TARGET)%_files) -HTML_FILES = $(SOURCE_FILES:%.ipynb=$(HTML_TARGET)%_files) -SLIDES_FILES = $(SOURCE_FILES:%.ipynb=$(SLIDES_TARGET)%_files) - -SITEMAP_SVG = $(NOTEBOOKS)/PICS/Sitemap.svg - - -# Configuration -# The site -SITE = https://www.$(PROJECT).org - -# What we use for production: nbpublish (preferred), bookbook, or nbconvert -PUBLISH ?= nbpublish - -# What we use for LaTeX: latexmk (preferred), or pdflatex -LATEX ?= latexmk - -## Tools -# Python -PYTHON ?= python3 - -# Jupyter -JUPYTER ?= jupyter - -# The nbpublish tool (preferred; https://github.com/chrisjsewell/ipypublish) -# (see nbpublish -h for details) -NBPUBLISH ?= nbpublish -NBPUBLISH_OPTIONS ?= -log warning - -# The bookbook tool (okay for chapters and books; but no citations yet) -# https://github.com/takluyver/bookbook -BOOKBOOK_LATEX ?= $(PYTHON) -m bookbook.latex -BOOKBOOK_HTML ?= $(PYTHON) -m bookbook.html - -# The nbconvert alternative (okay for chapters; doesn't work for book; no citations) -NBCONVERT ?= $(JUPYTER) nbconvert -NBCONVERT_OPTIONS ?= --log-level=WARN - -# Notebook merger -NBMERGE = $(PYTHON) utils/nbmerge.py - -# LaTeX -PDFLATEX ?= pdflatex -XELATEX ?= xelatex -BIBTEX ?= bibtex -LATEXMK ?= latexmk -LATEXMK_OPTS ?= -xelatex -quiet -f -interaction=nonstopmode - -# Word -PANDOC ?= pandoc - -# Markdown (see https://github.com/aaren/notedown) -NOTEDOWN ?= notedown - -# Style checks -PYCODESTYLE ?= pycodestyle -PYCODESTYLE_CFG = code/pycodestyle.cfg - -AUTOPEP8 ?= autopep8 -AUTOPEP8_CFG = code/autopep8.cfg -AUTOPEP8_OPTIONS = --global-config $(AUTOPEP8_CFG) --aggressive --in-place -NBAUTOPEP8 = $(PYTHON) utils/nbautopep8.py - -# Program to open files after creating, say OPEN=open (default: ignore; "true" does nothing) -OPEN ?= true - -# Make directory -MKDIR = mkdir -p - -ifndef PUBLISH -# Determine publishing program -OUT := $(shell which $(NBPUBLISH) > /dev/null && echo yes) -ifeq ($(OUT),yes) -# We have nbpublish -PUBLISH = nbpublish -else -# Issue a warning message -OUT := $(shell $(NBPUBLISH) -h > /dev/null) -# We have nbconvert -PUBLISH = nbconvert -PUBLISH_PLUGINS = -endif -endif - -ifndef LATEX -# Determine publishing program -OUT := $(shell which $(LATEXMK) > /dev/null && echo yes) -ifeq ($(OUT),yes) -# We have latexmk -LATEX = $(LATEXMK) -else -# Issue a warning message -OUT := $(shell $(LATEXMK) -h > /dev/null) -# We have pdflatex -LATEX = $(PDFLATEX) -endif -endif - - -# Book base name -BOOK = $(PROJECT) - -ifeq ($(PUBLISH),bookbook) -# Use bookbook -CONVERT_TO_HTML = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to html --output-dir=$(HTML_TARGET) -CONVERT_TO_TEX = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to latex --template $(PROJECT).tplx --output-dir=$(PDF_TARGET) -BOOK_TEX = $(PDF_TARGET)$(BOOK).tex -BOOK_PDF = $(PDF_TARGET)$(BOOK).pdf -BOOK_HTML = $(HTML_TARGET)$(BOOK).html -BOOK_HTML_FILES = $(HTML_TARGET)$(BOOK)_files -BOOK_PDF_FILES = $(PDF_TARGET)$(BOOK)_files -PUBLISH_PLUGINS = -else -ifeq ($(PUBLISH),nbpublish) -# Use nbpublish -CONVERT_TO_HTML = $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f html_ipypublish_chapter --outpath $(HTML_TARGET) -CONVERT_TO_TEX = $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f latex_ipypublish_chapter --outpath $(PDF_TARGET) -# CONVERT_TO_SLIDES = $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f slides_ipypublish_all --outpath $(SLIDES_TARGET) -BOOK_TEX = $(PDF_TARGET)$(BOOK).tex -BOOK_PDF = $(PDF_TARGET)$(BOOK).pdf -BOOK_HTML = $(HTML_TARGET)$(BOOK).html -BOOK_HTML_FILES = $(HTML_TARGET)$(BOOK)_files -BOOK_PDF_FILES = $(PDF_TARGET)$(BOOK)_files -PUBLISH_PLUGINS = \ - ipypublish_plugins/html_ipypublish_chapter.py \ - ipypublish_plugins/latex_ipypublish_book.py \ - ipypublish_plugins/latex_ipypublish_chapter.py -else -# Use standard Jupyter tools -CONVERT_TO_HTML = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to html --output-dir=$(HTML_TARGET) -CONVERT_TO_TEX = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to latex --template $(PROJECT).tplx --output-dir=$(PDF_TARGET) -# CONVERT_TO_SLIDES = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to slides --output-dir=$(SLIDES_TARGET) -BOOK_TEX = -BOOK_PDF = -BOOK_HTML = -BOOK_HTML_FILES = -BOOK_PDF_FILES = -PUBLISH_PLUGINS = -endif -endif - -# For Python, we use our own script that takes care of distinguishing -# main (script) code from definitions to be imported -EXPORT_NOTEBOOK_CODE = $(NOTEBOOKS)/$(UTILS)/export_notebook_code.py -CONVERT_TO_PYTHON = $(PYTHON) $(EXPORT_NOTEBOOK_CODE) - -# This would be the Jupyter alternative -# CONVERT_TO_PYTHON = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to python --output-dir=$(CODE_TARGET) - -# For slides, we use the standard Jupyter tools -# Main reason: Jupyter has a neat interface to control slides/sub-slides/etc -CONVERT_TO_SLIDES = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to slides --output-dir=$(SLIDES_TARGET) -REVEAL_JS = $(SLIDES_TARGET)reveal.js - -# For Word .docx files, we start from the HTML version -CONVERT_TO_WORD = $(PANDOC) - -# For Markdown .md files, we use markdown -# Note: adding --run re-executes all code -# CONVERT_TO_MARKDOWN = $(NOTEDOWN) --to markdown -CONVERT_TO_MARKDOWN = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to markdown --output-dir=$(MARKDOWN_TARGET) - -# Run -# fuzzingbook/WhenToStopFuzzing needs about 120 seconds to render -# debuggingbook/Tracing may need up to 10 minutes -EXECUTE_TIMEOUT ?= 140 -EXECUTE_OPTIONS ?= --ExecutePreprocessor.timeout=$(EXECUTE_TIMEOUT) -EXECUTE_NOTEBOOK = $(TIME) $(NBCONVERT) $(NBCONVERT_OPTIONS) $(EXECUTE_OPTIONS) --to notebook --execute --output-dir=$(FULL_NOTEBOOKS) - -# Render -RENDER_NOTEBOOK = RENDER_HTML=1 $(NBCONVERT) $(NBCONVERT_OPTIONS) $(EXECUTE_OPTIONS) --to notebook --execute --output-dir=$(RENDERED_NOTEBOOKS) - - -# Zip -ZIP ?= zip -ZIP_OPTIONS = -r - - -# Short targets -# Default target is to build everything needed for publishing, -# such that we can run "make -k" in a loop -.PHONY: chapters web default -web default: html code test-code test-imports test-packages test-types slides -chapters: html - -# The book is recreated after any change to any source -.PHONY: book all and more -book $(PROJECT): book-html book-pdf -all: chapters pdf code slides book -and more: word markdown epub - -# Individual targets -.PHONY: html pdf python code slides word doc docx md markdown epub -.PHONY: full-notebooks full fulls rendered-notebooks rendered renders book-pdf book-html -html: ipypublish-chapters $(HTMLS) -pdf: ipypublish-chapters $(PDFS) -nbpdf: ipypublish-chapters $(NBPDFS) -python code: $(PYS) -slides: $(SLIDES) -word doc docx: $(WORDS) -md markdown: $(MARKDOWNS) -epub: $(EPUBS) -full-notebooks full fulls: $(FULLS) -rendered-notebooks rendered renders: $(RENDERS) - -book-pdf $(PROJECT)-pdf: ipypublish-book $(BOOK_PDF) -book-html $(PROJECT)-html: ipypublish-book $(BOOK_HTML) - -.PHONY: ipypublish-book ipypublish-chapters -ifeq ($(PUBLISH),bookbook) -ipypublish-book: -ipypublish-chapters: -else -ifeq ($(PUBLISH),nbpublish) -ipypublish-book: -ipypublish-chapters: -else -ipypublish-book: - @echo "To create the book, you need the 'nbpublish' program." - @echo "This is part of the 'ipypublish' package" - @echo "at https://github.com/chrisjsewell/ipypublish" -ipypublish-chapters: - @echo "Warning: Using '$(NBCONVERT)' instead of '$(NBPUBLISH)'" - @echo "Documents will be created without citations and references" - @echo "Install the 'ipypublish' package" - @echo "from https://github.com/chrisjsewell/ipypublish" -endif -endif - -.PHONY: edit jupyter lab notebook -# Invoke notebook and editor: `make jupyter lab` -edit notebook: - $(JUPYTER) notebook - -lab: - $(JUPYTER) lab - -jupyter: - - -# Help -.PHONY: help -help: - @echo "Welcome to the '$(PROJECT)' Makefile!" - @echo "" - @echo "* make chapters (default) -> HTML and code for all chapters (notebooks)" - @echo "* make (pdf|html|code|slides|word|markdown) -> given subcategory only" - @echo "* make book -> entire book in PDF and HTML" - @echo "* make all -> all inputs in all output formats" - @echo "* make reformat -> reformat notebook Python code according to PEP8 guidelines" - @echo "* make style -> style checker" - @echo "* make crossref -> cross reference checker" - @echo "* make stats -> report statistics" - @echo "* make clean -> delete all derived files" - @echo "" - @echo "Created files end here:" - @echo "* PDFs -> '$(PDF_TARGET)', HTML -> '$(HTML_TARGET)', Python code -> '$(CODE_TARGET)', Slides -> '$(SLIDES_TARGET)'" - @echo "* Web site files -> '$(DOCS_TARGET)'" - @echo "" - @echo "Publish:" - @echo "* make docs -> Create public version of current documents" - @echo "* make beta -> Create beta version of current documents" - @echo "* make publish-all -> Add docs to git, preparing for publication" - @echo "" - @echo "Settings:" - @echo "* Use make PUBLISH=(nbconvert|nbpublish|bookbook) to choose a converter" - @echo " (default: automatic)" - -# Run a notebook, (re)creating all output cells -ADD_METADATA = $(SHARED)utils/add_metadata.py -NBAUTOSLIDE = $(SHARED)utils/nbautoslide.py -NBSYNOPSIS = $(SHARED)utils/nbsynopsis.py -NBSHORTEN = $(SHARED)utils/nbshorten.py - -COMMIT_SYNOPSIS = -git commit -m "Update synopsis" $(NOTEBOOKS)/PICS/*synopsis* - -$(FULL_NOTEBOOKS)/%.ipynb: $(NOTEBOOKS)/%.ipynb $(DEPEND_TARGET)%.makefile $(ADD_METADATA) $(NBAUTOSLIDE) $(NBSYNOPSIS) - $(EXECUTE_NOTEBOOK) $< - $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $@ > $@~ && mv $@~ $@ - $(PYTHON) $(NBAUTOSLIDE) --in-place $@ - $(PYTHON) $(NBSYNOPSIS) --project $(PROJECT) --update $@ - $(COMMIT_SYNOPSIS) - -$(RENDERED_NOTEBOOKS)/%.ipynb: $(NOTEBOOKS)/%.ipynb $(DEPEND_TARGET)%.makefile $(ADD_METADATA) $(SHARED)$(NBAUTOSLIDE) $(SHARED)$(NBSYNOPSIS) $(SHARED)$(NBSHORTEN) $(NOTEBOOKS)/$(UTILS)/__init__.py - $(RENDER_NOTEBOOK) $< - $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $@ > $@~ && mv $@~ $@ - $(PYTHON) $(NBAUTOSLIDE) --in-place $@ - RENDER_HTML=1 $(PYTHON) $(NBSYNOPSIS) --project $(PROJECT) --update $@ - $(COMMIT_SYNOPSIS) - $(PYTHON) $(NBSHORTEN) --link-to "$(SITE)/html/" --in-place $@ - -$(FULL_NOTEBOOKS)/$(UTILS): - $(MKDIR) $(FULL_NOTEBOOKS)/$(UTILS) - -$(FULL_NOTEBOOKS)/$(UTILS)/%: $(NOTEBOOKS)/$(UTILS)/% - @test -d $(FULL_NOTEBOOKS)/$(UTILS) || \ - $(MKDIR) $(FULL_NOTEBOOKS)/$(UTILS) - cp -pr $< $@ - - - -# Conversion rules - chapters -ifeq ($(LATEX),pdflatex) -# Use PDFLaTeX -$(PDF_TARGET)%.pdf: $(PDF_TARGET)%.tex $(BIB) - @echo Running LaTeX... - @-test -L $(PDF_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(PDF_TARGET) - cd $(PDF_TARGET) && $(PDFLATEX) $* - -cd $(PDF_TARGET) && $(BIBTEX) $* - cd $(PDF_TARGET) && $(PDFLATEX) $* - cd $(PDF_TARGET) && $(PDFLATEX) $* - @cd $(PDF_TARGET) && $(RM) $*.aux $*.bbl $*.blg $*.log $*.out $*.toc $*.frm $*.lof $*.lot $*.fls - @cd $(PDF_TARGET) && $(RM) -r $*_files - @echo Created $@ - @$(OPEN) $@ -else -# Use LaTeXMK -$(PDF_TARGET)%.pdf: $(PDF_TARGET)%.tex $(BIB) - @echo Running LaTeXMK... - @-test -L $(PDF_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(PDF_TARGET) - cd $(PDF_TARGET) && $(LATEXMK) $(LATEXMK_OPTS) $* - @cd $(PDF_TARGET) && $(RM) $*.aux $*.bbl $*.blg $*.log $*.out $*.toc $*.frm $*.lof $*.lot $*.fls $*.fdb_latexmk $*.xdv - @echo Created $@ - @$(OPEN) $@ -endif - -# Keep the .tex files -.PRECIOUS: $(PDF_TARGET)%.tex - -POST_TEX = utils/post_tex - -$(PDF_TARGET)%.tex: $(RENDERED_NOTEBOOKS)/%.ipynb $(BIB) $(PUBLISH_PLUGINS) $(SHARED)$(ADD_METADATA) $(SHARED)$(POST_TEX) - $(eval TMPDIR := $(shell mktemp -d)) - $(PYTHON) $(ADD_METADATA) --project $(PROJECT) --titlepage $< > $(TMPDIR)/$(notdir $<) - cp -pr $(NOTEBOOKS)/PICS $(BIB) $(TMPDIR) - $(CONVERT_TO_TEX) $(TMPDIR)/$(notdir $<) - $(POST_TEX) $@ > $@~ && mv $@~ $@ - @-$(RM) -fr $(TMPDIR) - @cd $(PDF_TARGET) && $(RM) $*.nbpub.log - - -POST_HTML_OPTIONS = $(BETA_FLAG) \ - --project="$(PROJECT)" \ - --title="$(BOOKTITLE)" \ - --authors="$(AUTHORS)" \ - --twitter="$(TWITTER)" \ - --all-chapters="$(ALL_CHAPTER_SOURCES) $(APPENDICES_SOURCES)" \ - --public-chapters="$(CHAPTER_SOURCES) $(APPENDICES_SOURCES)" \ - --ready-chapters="$(READY_SOURCES)" \ - --todo-chapters="$(TODO_SOURCES)" \ - --new-chapters="$(NEW_SOURCES)" - -HTML_DEPS = $(BIB) $(SHARED)$(PUBLISH_PLUGINS) $(SHARED)utils/post_html.py $(CHAPTERS_MAKEFILE) $(BIBCHECK) - -# Check bib -BIBER = biber -BIBCHECK = .$(BIB).ascii .$(BIB).python .$(BIB).biber -checkbib check-bib: $(BIBCHECK) - @echo "Check completed; $(BIB) is ok" - -check-bib-ascii: .$(BIB).ascii -.$(BIB).ascii: $(BIB) - @echo "Checking $(BIB) for 7-bit ASCII encoding" - @if grep -Hn '[^[:print:]]' fuzzingbook.bib; then false; fi - @touch $@ - -check-bib-python: .$(BIB).python -.$(BIB).python: $(BIB) - @echo "Checking $(BIB) for Python usage with bibtexparser" - @$(PYTHON) -W error -c 'import bibtexparser; fd = open("$(BIB)"); bibtexparser.load(fd); fd.close()' - @touch $@ - -check-bib-biber: .$(BIB).biber -.$(BIB).biber: $(BIB) - @echo "Checking $(BIB) for LaTeX usage with Biber" - @$(BIBER) --tool --validate-datamodel --quiet $(BIB) - @$(RM) fuzzingbook_bibertool.bib fuzzingbook.bib.blg - @touch .$(BIB).biber - -.PHONY: checkbib check-bib check-bib-ascii check-bib-python check-bib-biber - - -# index.html comes with relative links (html/) such that the beta version gets the beta menu -$(DOCS_TARGET)index.html: \ - $(FULL_NOTEBOOKS)/index.ipynb $(HTML_DEPS) - @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) - @test -d $(HTML_TARGET) || $(MKDIR) $(HTML_TARGET) - $(CONVERT_TO_HTML) $< - mv $(HTML_TARGET)index.html $@ - @cd $(HTML_TARGET) && $(RM) -r index.nbpub.log index_files - $(PYTHON) $(SHARED)utils/post_html.py --menu-prefix=html/ --home $(POST_HTML_OPTIONS)$(HOME_POST_HTML_OPTIONS) $@ - @$(OPEN) $@ - -# 404.html comes with absolute links (/html/) such that it works anywhare -# https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/ -$(DOCS_TARGET)404.html: $(FULL_NOTEBOOKS)/404.ipynb $(HTML_DEPS) - @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) - @test -d $(HTML_TARGET) || $(MKDIR) $(HTML_TARGET) - $(CONVERT_TO_HTML) $< - mv $(HTML_TARGET)404.html $@ - @cd $(HTML_TARGET) && $(RM) -r 404.nbpub.log 404_files - $(PYTHON) $(SHARED)utils/post_html.py --menu-prefix=/html/ --home $(POST_HTML_OPTIONS) $@ - (echo '---'; echo 'permalink: /404.html'; echo '---'; cat $@) > $@~ && mv $@~ $@ - @$(OPEN) $@ - -$(DOCS_TARGET)html/00_Index.html: $(DOCS_TARGET)notebooks/00_Index.ipynb $(HTML_DEPS) - $(CONVERT_TO_HTML) $< - @cd $(HTML_TARGET) && $(RM) -r 00_Index.nbpub.log 00_Index_files - @cd $(DOCS_TARGET)html && $(RM) -r 00_Index.nbpub.log 00_Index_files - mv $(HTML_TARGET)00_Index.html $@ - $(PYTHON) $(SHARED)utils/post_html.py $(POST_HTML_OPTIONS) $@ - -$(DOCS_TARGET)html/00_Table_of_Contents.html: $(DOCS_TARGET)notebooks/00_Table_of_Contents.ipynb $(SITEMAP_SVG) - $(CONVERT_TO_HTML) $< - @cd $(HTML_TARGET) && $(RM) -r 00_Table_of_Contents.nbpub.log 00_Table_of_Contents_files - @cd $(DOCS_TARGET)html && $(RM) -r 00_Table_of_Contents.nbpub.log 00_Table_of_Contents_files - mv $(HTML_TARGET)00_Table_of_Contents.html $@ - $(PYTHON) $(SHARED)utils/post_html.py $(POST_HTML_OPTIONS) $@ - @$(OPEN) $@ - -$(HTML_TARGET)%.html: $(FULL_NOTEBOOKS)/%.ipynb $(HTML_DEPS) - @test -d $(HTML_TARGET) || $(MKDIR) $(HTML_TARGET) - $(CONVERT_TO_HTML) $< - @cd $(HTML_TARGET) && $(RM) $*.nbpub.log $*_files/$(BIB) - $(PYTHON) $(SHARED)utils/post_html.py $(POST_HTML_OPTIONS) $@ - @-test -L $(HTML_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(HTML_TARGET) - @$(OPEN) $@ - -$(SLIDES_TARGET)%.slides.html: $(FULL_NOTEBOOKS)/%.ipynb $(BIB) $(NBSHORTEN) - @test -d $(SLIDES_TARGET) || $(MKDIR) $(SLIDES_TARGET) - $(eval TMPDIR := $(shell mktemp -d)) - sed 's/\.ipynb)/\.slides\.html)/g' $< > $(TMPDIR)/$(notdir $<) - $(PYTHON) $(NBSHORTEN) --skip-slides --in-place $(TMPDIR)/$(notdir $<) - $(CONVERT_TO_SLIDES) $(TMPDIR)/$(notdir $<) - @cd $(SLIDES_TARGET) && $(RM) $*.nbpub.log $*_files/$(BIB) - @-test -L $(HTML_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(HTML_TARGET) - @-$(RM) -fr $(TMPDIR) - @$(OPEN) $@ - - -# Rules for beta targets -.FORCE: -ifndef BETA -beta/%: .FORCE - @$(MAKE) BETA=beta $(@:beta/=) - -$(DOCS_TARGET)beta/%: .FORCE - @$(MAKE) BETA=beta $(@:beta/=) - -%-beta: .FORCE - @$(MAKE) BETA=beta $(@:-beta=) - -%-all: % %-beta - @true - -.PHONY: beta -beta: default-beta -else: -beta: -endif - - -# Reconstructing the reveal.js dir -.PHONY: reveal.js -$(REVEAL_JS) reveal.js: .FORCE - @-test -d "$@" || (cd $(SLIDES_TARGET); \ - git submodule add https://github.com/hakimel/reveal.js.git) - @git submodule update --remote - -$(CODE_TARGET)setup.py: $(CODE_TARGET)setup.py.in - cat $< > $@ - chmod +x $@ - -$(CODE_TARGET)__init__.py: $(CODE_TARGET)__init__.py.in $(CHAPTERS_MAKEFILE) - cat $< > $@ - (for module in $(IMPORTS); do echo from . import $$module; done) | grep -v '^.*[0-9][0-9]_.*' >> $@ - chmod +x $@ - -# For code, we comment out fuzzingbook/debuggingbook imports, -# ensuring we import a .py and not the .ipynb file -$(CODE_TARGET)%.py: $(FULL_NOTEBOOKS)/%.ipynb $(EXPORT_NOTEBOOK_CODE) - @test -d $(CODE_TARGET) || $(MKDIR) $(CODE_TARGET) - $(CONVERT_TO_PYTHON) --project $(PROJECT) $< > $@~ && mv $@~ $@ - # $(AUTOPEP8) $(AUTOPEP8_OPTIONS) $@ - -chmod +x $@ - -$(MYPY_TARGET)%.py: $(NOTEBOOKS)/%.ipynb $(EXPORT_NOTEBOOK_CODE) - @test -d $(MYPY_TARGET) || $(MKDIR) $(MYPY_TARGET) - $(CONVERT_TO_PYTHON) --project $(PROJECT) --mypy $< > $@~ && mv $@~ $@ - -# Markdown -$(MARKDOWN_TARGET)%.md: $(RENDERED_NOTEBOOKS)/%.ipynb $(BIB) - $(RM) -r $(MARKDOWN_TARGET)$(basename $(notdir $<)).md $(MARKDOWN_TARGET)$(basename $(notdir $<))_files - $(CONVERT_TO_MARKDOWN) $< - -# For word, we convert from the HTML file -$(WORD_TARGET)%.docx: $(HTML_TARGET)%.html $(WORD_TARGET)pandoc.css - $(PANDOC) --css=$(WORD_TARGET)pandoc.css $< -o $@ - -# Epub comes from the markdown file -$(EPUB_TARGET)%.epub: $(MARKDOWN_TARGET)%.md - cd $(MARKDOWN_TARGET); $(PANDOC) -o ../$@ ../$< - - -# NBPDF files - generated from HMTL, with embedded notebooks -# See instructions at https://github.com/betatim/notebook-as-pdf -HTMLTONBPDF = $(SHARED)utils/htmltonbpdf.py - -$(NBPDF_TARGET)%.pdf: $(HTML_TARGET)/%.html $(RENDERED_NOTEBOOKS)/%.ipynb $(HTMLTONBPDF) $(HTML_TARGET)custom.css - @test -d $(NBPDF_TARGET) || $(MKDIR) $(NBPDF_TARGET) - $(PYTHON) $(HTMLTONBPDF) --attach --fix-html-links $${PWD}/$(HTML_TARGET)$(basename $(notdir $<)).html $(RENDERED_NOTEBOOKS)/$(basename $(notdir $<)).ipynb $@ - sed "s!$(HTML_TARGET)!$(NBPDF_TARGET)!g" $@ > $@~ && mv $@~ $@ - - -# Conversion rules - entire book -# We create a fuzzingbook/ or debuggingbook/ folder -# with the chapters ordered by number, -# and let the fuzzingbook converters run on this -ifeq ($(PUBLISH),nbpublish) -# With nbpublish -$(PDF_TARGET)$(BOOK).tex: $(RENDERS) $(BIB) $(PUBLISH_PLUGINS) $(CHAPTERS_MAKEFILE) - -$(RM) -r $(BOOK) - $(MKDIR) $(BOOK) - chapter=0; \ - for file in $(SOURCE_FILES); do \ - chnum=$$(printf "%02d" $$chapter); \ - ln -s ../$(RENDERED_NOTEBOOKS)/$$file $(BOOK)/$$(echo $$file | sed 's/.*/Ch'$${chnum}'_&/g'); \ - chapter=$$(expr $$chapter + 1); \ - done - ln -s ../$(BIB) $(BOOK) - $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f latex_ipypublish_book --outpath $(PDF_TARGET) $(BOOK) - $(POST_TEX) $@ > $@~ && mv $@~ $@ - $(RM) -r $(BOOK) - cd $(PDF_TARGET) && $(RM) $(BOOK).nbpub.log - @echo Created $@ - -$(HTML_TARGET)$(BOOK).html: $(FULLS) $(BIB) $(SHARED)utils/post_html.py - -$(RM) -r $(BOOK) - $(MKDIR) $(BOOK) - chapter=0; \ - for file in $(SOURCE_FILES); do \ - chnum=$$(printf "%02d" $$chapter); \ - ln -s ../$(FULL_NOTEBOOKS)/$$file $(BOOK)/$$(echo $$file | sed 's/.*/Ch'$${chnum}'_&/g'); \ - chapter=$$(expr $$chapter + 1); \ - done - ln -s ../$(BIB) $(BOOK) - $(CONVERT_TO_HTML) $(BOOK) - $(PYTHON) $(SHARED)utils/nbmerge.py $(BOOK)/Ch*.ipynb > notebooks/$(BOOK).ipynb - $(PYTHON) $(SHARED)utils/post_html.py $(BETA_FLAG) $(POST_HTML_OPTIONS) $@ - $(RM) -r $(BOOK) notebooks/$(BOOK).ipynb - cd $(HTML_TARGET) && $(RM) $(BOOK).nbpub.log $(BOOK)_files/$(BIB) - @echo Created $@ -else -# With bookbook -$(PDF_TARGET)$(BOOK).tex: $(RENDERS) $(BIB) $(PUBLISH_PLUGINS) $(CHAPTERS_MAKEFILE) - -$(RM) -r $(BOOK) - $(MKDIR) $(BOOK) - chapter=0; \ - for file in $(SOURCE_FILES); do \ - chnum=$$(printf "%02d" $$chapter); \ - ln -s ../$(RENDERED_NOTEBOOKS)/$$file book/$$(echo $$file | sed 's/.*/'$${chnum}'-&/g'); \ - chapter=$$(expr $$chapter + 1); \ - done - cd book; $(BOOKBOOK_LATEX) - mv book/combined.tex $@ - $(POST_TEX) $@ > $@~ && mv $@~ $@ - $(RM) -r book - @echo Created $@ - -$(HTML_TARGET)book.html: $(FULLS) $(BIB) $(PUBLISH_PLUGINS) - -$(RM) -r book - $(MKDIR) book - for file in $(SOURCE_FILES); do \ - ln -s ../$(FULL_NOTEBOOKS)/$$file book/$$(echo $$file | sed 's/[^-0-9]*\([-0-9][0-9]*\)_\(.*\)/\1-\2/g'); \ - done - cd book; $(BOOKBOOK_HTML) - mv book/html/index.html $@ - mv book/html/*.html $(HTML_TARGET) - $(RM) -r book - @echo Created $@ -endif - - -## Some checks - -# Style checks -.PHONY: style check-style checkstyle -style check-style checkstyle: $(PYS) $(PYCODESTYLE_CFG) - $(PYCODESTYLE) --config $(PYCODESTYLE_CFG) $(PYS) - @echo "All style checks passed." - -# Automatic formatting -.PHONY: autopep8 reformat -autopep8 reformat: $(PYCODESTYLE_CFG) - $(NBAUTOPEP8) --split-cells --jobs -1 $(AUTOPEP8_OPTIONS) $(SOURCES) - @echo "Code reformatting complete. Use 'make full' to re-execute and test notebooks." - - -# List of Cross References -.PHONY: check-crossref crossref xref -check-crossref crossref xref: $(SOURCES) - @echo "Referenced notebooks (* = missing)" - @files=$$(grep '\.ipynb)' $(SOURCES) | sed 's/.*[(]\([a-zA-Z0-9_][a-zA-Z0-9_-]*\.ipynb\)[)].*/\1/' | grep -v http | sort | uniq); \ - for file in $$files; do \ - if [ -f $(NOTEBOOKS)/$$file ]; then \ - echo ' ' $$file; \ - else \ - echo '* ' $$file "- in" $$(cd $(NOTEBOOKS); grep -l $$file $(SOURCE_FILES)); \ - fi \ - done - - -# Stats -.PHONY: stats -stats: $(SOURCES) - @cd $(NOTEBOOKS); ../$(SHARED)utils/nbstats.py $(SOURCE_FILES) - -# Run all code. This should produce no failures. -PY_SUCCESS_MAGIC = "--- Code check passed ---" -PYS_OUT = $(SOURCE_FILES:%.ipynb=$(CODE_TARGET).%.py.out) -$(CODE_TARGET).%.py.out: $(CODE_TARGET)%.py - @echo Running $<... - @if $(PYTHON) -W error $< > $@ 2>&1; then \ - echo $(PY_SUCCESS_MAGIC) >> $@; \ - exit 0; \ - else \ - echo "Error while running $<" >> $@; \ - tail $@; \ - touch -r $< $@; \ - touch -A -010000 $@; \ - exit 1; \ - fi - -# No need to check if Tracking.py works; it's not run by users anyway -$(CODE_TARGET).Tracking.py.out: $(CODE_TARGET)Tracking.py - @echo Skipping $<... - @echo $(PY_SUCCESS_MAGIC) > $@ - -.PHONY: test-code -test-code: code $(PYS_OUT) - -.PHONY: check-code -check-code: test-code - @files_with_errors=$$(grep --files-without-match -- $(PY_SUCCESS_MAGIC) $(PYS_OUT)); \ - if [ -z "$$files_with_errors" ]; then \ - echo "All code checks passed."; \ - else \ - echo "Check these files for errors: $$files_with_errors"; \ - exit 1; \ - fi - -# Import all code. This should produce no output (or error messages). -IMPORTS = $(subst .ipynb,,$(CHAPTERS) $(APPENDICES)) -IMPORTS_OUT = $(CODE_TARGET).import_all.py.out - -.PHONY: test-import test-imports -test-import test-imports: code $(IMPORTS_OUT) - -.PHONY: check-import check-imports -check-import check-imports: test-imports - @echo "All import checks passed." - -$(IMPORTS_OUT): $(PYS) - @echo "#!/usr/bin/env $(PYTHON)" > import_all.py - @(for module in $(IMPORTS); do echo import code.$$module; done) | grep -v '^.*[0-9][0-9]_.*' >> import_all.py - $(PYTHON) import_all.py 2>&1 | tee $@ - @$(RM) import_all.py - @test ! -s $@ - -# Same as above, but using Python standard packages only; import should work too -check-standard-imports: code - # PYTHONPATH= $(MAKE) check-imports - -PACKAGES_OUT = $(CODE_TARGET).import_packages.py.out -.PHONY: test-packages -test-packages: $(PACKAGES_OUT) - -check-package check-packages: test-packages - @echo "Package check passed." - -$(PACKAGES_OUT): $(PYS) - @echo "#!/usr/bin/env $(PYTHON)" > import_packages.py - @(for module in $(IMPORTS); do echo import code.$$module; done) | grep -v '^import code.[0-9][0-9]' >> import_packages.py - $(PYTHON) import_packages.py 2>&1 | tee $@ - @$(RM) import_packages.py - @test ! -s $@ - - -# Static type checking -MYPY = mypy -# MYPYS = $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET)%.py) -MYPYS_OUT = $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET).%.py.out) -$(MYPY_TARGET).%.py.out: $(MYPY_TARGET)%.py $(MYPY_TARGET)/mypy.ini - @echo Type-checking $<... - @if $(MYPY) --config-file $(MYPY_TARGET)/mypy.ini $< > $@ 2>&1; then \ - echo $(PY_SUCCESS_MAGIC) >> $@; \ - exit 0; \ - else \ - echo "Error type checking $<" >> $@; \ - tail $@; \ - touch -r $< $@; \ - touch -A -010000 $@; \ - exit 1; \ - fi - -UTILS_MYPY_OUT = $(MYPY_TARGET).$(UTILS).py.out -$(UTILS_MYPY_OUT): $(UTILITY_FILES:%=$(NOTEBOOKS)/$(UTILS)/%) - @echo Type-checking $(NOTEBOOKS)/$(UTILS)... - @if $(MYPY) --config-file $(MYPY_TARGET)/mypy.ini $(NOTEBOOKS)/$(UTILS) > $@ 2>&1; then \ - echo $(PY_SUCCESS_MAGIC) >> $@; \ - exit 0; \ - else \ - echo "Error type checking $<" >> $@; \ - tail $@; \ - touch -r $< $@; \ - touch -A -010000 $@; \ - exit 1; \ - fi - -test-types: $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET)%.py) \ - $(UTILS_MYPY_OUT) $(MYPYS_OUT) - -check-types: test-types - @files_with_errors=$$(grep --files-without-match -- $(PY_SUCCESS_MAGIC) $(MYPYS_OUT) $(UTILS_MYPY_OUT)); \ - if [ -z "$$files_with_errors" ]; then \ - echo "All type checks passed."; \ - else \ - echo "Check these files for errors: $$files_with_errors"; \ - exit 1; \ - fi - -.PHONY: run -run: check-imports check-standard-imports check-package check-types check-code - -# Todo checks -check-todo todo: - @grep '\\todo' $(ALL_CHAPTER_SOURCES); \ - if [ $$? = 0 ]; then exit 1; else \ - echo "No todos in $(PUBLIC_CHAPTERS:%.ipynb=%) $(READY_CHAPTERS:%.ipynb=%)"; exit 0; fi - -# Spell checks -NBSPELLCHECK = $(SHARED)utils/nbspellcheck.py -.PHONY: spell spellcheck check-spell -spell spellcheck check-spell: - $(NBSPELLCHECK) $(SOURCES) - - -# All checks -.PHONY: check check-all -check check-all: check-import check-package check-types check-code check-style check-crossref check-todo - -# Add notebook metadata (add table of contents, bib reference, etc.) -.PHONY: metadata -metadata: $(ADD_METADATA) - @for notebook in $(SOURCES); do \ - echo "Adding metadata to $$notebook...\c"; \ - $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $$notebook > $$notebook~ || exit 1; \ - if diff $$notebook $$notebook~; then \ - echo "unchanged."; \ - else \ - mv $$notebook~ $$notebook; \ - echo "done."; \ - fi; \ - $(RM) $$notebook~; \ - done - - -## Publishing -.PHONY: docs -docs: publish-notebooks publish-index publish-html publish-code publish-dist \ - publish-slides publish-pics \ - $(DOCS_TARGET)index.html $(DOCS_TARGET)404.html README.md binder/postBuild - @echo "Now use 'make publish-all' to commit changes to docs." - -# github does not like script tags; -# links to notebooks need to get adapted -README.md: $(MARKDOWN_TARGET)index.md Makefile - sed 's!!!g' $< | \ - sed 's!(\([_a-zA-Z0-9]*\).ipynb)!($(SITE)/html/\1.html)!g'> $@ - -.PHONY: publish -publish: run quick-publish -quick-publish: docs - git add $(DOCS_TARGET)* binder/postBuild README.md \ - $(NOTEBOOKS)/PICS/*-synopsis-* - -git status - -git commit -m "Doc update" - @echo "Now use 'make push' to place docs on website and trigger a mybinder update" - -# Add/update HTML code in Web pages -.PHONY: publish-html publish-html-setup -publish-html: html publish-html-setup \ - $(DOCS_TARGET)html/00_Index.html \ - $(DOCS_TARGET)html/00_Table_of_Contents.html \ - $(DOCS_TARGET)html/custom.css \ - $(DOCS_TARGET)html/favicon \ - $(DOCS:%=$(DOCS_TARGET)html/%.html) \ - $(DOCS:%=$(DOCS_TARGET)html/%_files) - -publish-html-setup: - @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) - @test -d $(DOCS_TARGET)html || $(MKDIR) $(DOCS_TARGET)html - -$(DOCS_TARGET)html/%: $(HTML_TARGET)% - $(RM) -r $@ - cp -pr $< $@ - -# Add/update Python code on Web pages -.PHONY: publish-code publish-code-setup -publish-code: code publish-code-setup \ - $(DOCS_TARGET)code/LICENSE.md \ - $(DOCS_TARGET)code/README.md \ - $(DOCS_TARGET)code/setup.py \ - $(DOCS_TARGET)code/__init__.py \ - $(UTILITY_FILES:%=$(DOCS_TARGET)code/$(UTILS)/%) \ - $(PUBLIC_CHAPTERS:%.ipynb=$(DOCS_TARGET)code/%.py) \ - $(APPENDICES:%.ipynb=$(DOCS_TARGET)code/%.py) - -publish-code-setup: - @test -d $(DOCS_TARGET) \ - || $(MKDIR) $(DOCS_TARGET) - @test -d $(DOCS_TARGET)code \ - || $(MKDIR) $(DOCS_TARGET)code - @test -d $(DOCS_TARGET)code/$(UTILS) \ - || $(MKDIR) $(DOCS_TARGET)code/$(UTILS) - -$(DOCS_TARGET)code/%: $(CODE_TARGET)% - cp -pr $< $@ - -.PHONY: dist publish-dist -dist publish-dist: check-import check-package check-code publish-code toc \ - $(DOCS_TARGET)dist/$(PROJECT)-code.zip \ - $(DOCS_TARGET)dist/$(PROJECT)-notebooks.zip - -DIST_CODE_FILES = \ - $(DOCS_TARGET)code/README.md \ - $(DOCS_TARGET)code/LICENSE.md \ - $(DOCS_TARGET)code/setup.py \ - $(DOCS_TARGET)code/__init__.py - -check-install: - $(eval TMPDIR := $(shell mktemp -d)) - @cd $(TMPDIR); \ - $(PYTHON) -c 'import $(PROJECT)' 2> /dev/null; \ - if [ $$? = 0 ]; then \ - echo "Error: Installed $(PROJECT) package conflicts with package creation" >&2; \ - echo "Please uninstall it; e.g. with 'pip uninstall $(PROJECT)'." >&2; \ - exit 1; \ - else \ - exit 0; \ - fi - -clean-dist: - $(RM) -r code/__pycache__ - $(RM) -r code/$(UTILS)/__pycache__ - $(RM) -r $(DOCS_TARGET)notebooks/$(PROJECT)/__pycache__ - $(RM) -r $(DOCS_TARGET)notebooks/$(UTILS)/__pycache__ - $(RM) -r $(DOCS_TARGET)code/$(UTILS)/__pycache__ - $(RM) -r $(DOCS_TARGET)notebooks/.ipynb_checkpoints - -$(DOCS_TARGET)dist/$(PROJECT)-code.zip: \ - $(PYS) $(DIST_CODE_FILES) $(CHAPTERS_MAKEFILE) \ - check-install clean-dist - @-mkdir $(DOCS_TARGET)dist - $(RM) -r $(DOCS_TARGET)dist/* - $(RM) -r $(DOCS_TARGET)$(PROJECT) - mkdir $(DOCS_TARGET)$(PROJECT) - ln -s ../code $(DOCS_TARGET)$(PROJECT)/$(PROJECT) - mv $(DOCS_TARGET)$(PROJECT)/$(PROJECT)/setup.py $(DOCS_TARGET)$(PROJECT) - mv $(DOCS_TARGET)$(PROJECT)/$(PROJECT)/README.md $(DOCS_TARGET)$(PROJECT) - cd $(DOCS_TARGET)$(PROJECT); PYTHONPATH= $(PYTHON) ./setup.py sdist - mv $(DOCS_TARGET)$(PROJECT)/dist/* $(DOCS_TARGET)dist - # mv $(DOCS_TARGET)$(PROJECT)/*.egg-info $(DOCS_TARGET)dist - $(RM) -r $(DOCS_TARGET)$(PROJECT)/*.egg-info - $(RM) -r $(DOCS_TARGET)$(PROJECT)/dist $(DOCS_TARGET)$(PROJECT)/build - cd $(DOCS_TARGET); $(ZIP) $(ZIP_OPTIONS) $(PROJECT)-code.zip $(PROJECT) - mv $(DOCS_TARGET)$(PROJECT)-code.zip $(DOCS_TARGET)dist - $(RM) -r $(DOCS_TARGET)$(PROJECT) $(DOCS_TARGET)code/$(PROJECT) - $(RM) -r $(DOCS_TARGET)code/dist $(DOCS_TARGET)code/*.egg-info - @echo "Created code distribution files in $(DOCS_TARGET)dist" - -$(DOCS_TARGET)dist/$(PROJECT)-notebooks.zip: $(FULLS) $(CHAPTERS_MAKEFILE) \ - clean-dist - cd $(DOCS_TARGET); ln -s notebooks $(PROJECT)-notebooks - cd $(DOCS_TARGET); \ - $(ZIP) $(ZIP_OPTIONS) $(PROJECT)-notebooks.zip $(PROJECT)-notebooks - $(RM) $(DOCS_TARGET)/$(PROJECT)-notebooks - cd $(DOCS_TARGET); \ - for file in $(EXTRAS); do \ - $(ZIP) $(PROJECT)-notebooks.zip -d $(PROJECT)-notebooks/$$file; \ - done - mv $(DOCS_TARGET)$(PROJECT)-notebooks.zip $@ - @echo "Created notebook distribution files in $(DOCS_TARGET)dist" - - -# Add/update slides on Web pages -.PHONY: publish-slides publish-slides-setup -publish-slides: slides publish-slides-setup \ - $(PUBLIC_CHAPTERS:%.ipynb=$(DOCS_TARGET)slides/%.slides.html) \ - $(APPENDICES:%.ipynb=$(DOCS_TARGET)slides/%.slides.html) \ - $(REVEAL_JS) $(DOCS_TARGET)slides/reveal.js - @-rm -fr $(DOCS_TARGET)slides/.git - -publish-slides-setup: - @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) - @test -d $(DOCS_TARGET)slides || $(MKDIR) $(DOCS_TARGET)slides - -$(DOCS_TARGET)slides/%: $(SLIDES_TARGET)% - -rm -fr $@ - cp -pr $< $@ - - -# Add/update notebooks on Web pages -.PHONY: publish-notebooks publish-notebooks-setup -publish-notebooks: full-notebooks publish-notebooks-setup \ - $(DOCS_TARGET)notebooks/custom.css \ - $(DOCS_TARGET)notebooks/$(BIB) \ - $(DOCS_TARGET)notebooks/LICENSE.md \ - $(DOCS_TARGET)notebooks/README.md \ - $(DOCS:%=$(DOCS_TARGET)notebooks/%.ipynb) \ - $(UTILITY_FILES:%=$(DOCS_TARGET)notebooks/$(UTILS)/%) - -publish-notebooks-setup: - @test -d $(DOCS_TARGET) \ - || $(MKDIR) $(DOCS_TARGET) - @test -d $(DOCS_TARGET)notebooks \ - || $(MKDIR) $(DOCS_TARGET)notebooks - @test -d $(DOCS_TARGET)notebooks/$(UTILS) \ - || $(MKDIR) $(DOCS_TARGET)notebooks/$(UTILS) - -$(DOCS_TARGET)notebooks/%: $(FULL_NOTEBOOKS)/% - cp -pr $< $@ - -.PHONY: publish-index -publish-index: $(DOCS_TARGET)notebooks/00_Index.ipynb - - -# Add/update pics on Web pages -.PHONY: publish-pics publish-pics-setup -publish-pics: publish-pics-setup $(NOTEBOOKS)/PICS - cp -pr $(NOTEBOOKS)/PICS $(DOCS_TARGET)notebooks - -publish-pics-setup: - @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) - @test -d $(DOCS_TARGET)PICS || $(MKDIR) $(DOCS_TARGET)PICS - $(RM) -fr $(DOCS_TARGET)html/PICS; ln -s ../$(NOTEBOOKS)/PICS $(DOCS_TARGET)html - $(RM) -fr $(DOCS_TARGET)slides/PICS; ln -s ../$(NOTEBOOKS)/PICS $(DOCS_TARGET)slides - - -# Table of contents -.PHONY: toc -toc: $(DOCS_TARGET)notebooks/00_Table_of_Contents.ipynb -$(DOCS_TARGET)notebooks/00_Table_of_Contents.ipynb: $(SHARED)utils/nbtoc.py \ - $(TOC_CHAPTERS:%=$(DOCS_TARGET)notebooks/%) \ - $(TOC_APPENDICES:%=$(DOCS_TARGET)notebooks/%) \ - $(CHAPTERS_MAKEFILE) \ - $(SITEMAP_SVG) - $(RM) $@ - $(PYTHON) $(SHARED)utils/nbtoc.py \ - --title="$(BOOKTITLE)" \ - --chapters="$(TOC_CHAPTERS:%=$(DOCS_TARGET)notebooks/%)" \ - --appendices="$(TOC_APPENDICES:%=$(DOCS_TARGET)notebooks/%)" > $@ - $(EXECUTE_NOTEBOOK) $@ && mv $(FULL_NOTEBOOKS)/00_Table_of_Contents.ipynb $@ - $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $@ > $@~ && mv $@~ $@ - $(JUPYTER) trust $@ - @$(OPEN) $@ - - -# Index -.PHONY: index -index: $(DOCS_TARGET)notebooks/00_Index.ipynb $(DOCS_TARGET)/html/00_Index.html -$(DOCS_TARGET)notebooks/00_Index.ipynb: $(SHARED)utils/nbindex.py \ - $(TOC_CHAPTERS:%=$(DOCS_TARGET)notebooks/%) \ - $(TOC_APPENDICES:%=$(DOCS_TARGET)notebooks/%) \ - $(CHAPTERS_MAKEFILE) - (cd $(NOTEBOOKS); $(PYTHON) ../$(SHARED)utils/nbindex.py $(TOC_CHAPTERS) $(APPENDICES)) > $@ - @$(OPEN) $@ - -## Synopsis -update-synopsis synopsis: - $(PYTHON) $(NBSYNOPSIS) --project $(PROJECT) --update $(ALL_CHAPTER_SOURCES) - $(COMMIT_SYNOPSIS) - -no-synopsis: - @echo Chapters without synopsis: - @grep -L '## Synopsis' $(ALL_CHAPTER_SOURCES) | grep -v '[0-9]' - - -## Python packages -# After this, you can do 'pip install fuzzingbook / debuggingbook' -# and then 'from fuzzingbook.Fuzzer import Fuzzer' :-) -.PHONY: upload-dist -upload-dist: dist - @echo "Use your pypi.org password to upload" - cd $(DOCS_TARGET); twine upload dist/*.tar.gz - - - -## Binder services -# Make sure we have our custom.css in Binder, too -binder/postBuild: binder/postBuild.template $(HTML_TARGET)custom.css - cat binder/postBuild.template $(HTML_TARGET)custom.css > $@ - echo END >> $@ - chmod +x $@ - -# Force recreation of binder service; avoids long waiting times for first user -.PHONY: binder -binder: .FORCE - open $(BINDER_URL) - -# After a git push, we want binder to update; "make push" does this -.PHONY: push -push: .FORCE - git push - open $(BINDER_URL) - open $(PROJECT_URL) - -# Debugging binder -# This is the same system as mybinder uses, but should be easier to debug -# See https://repo2docker.readthedocs.io/en/latest/ -.PRECIOUS: binder/binder.log -.PHONY: binder-local debug-binder -binder-local debug-binder: binder/binder.log binder/postBuild -binder/binder.log: .FORCE - @echo Writing output to $@ - @docker version > /dev/null - jupyter-repo2docker --debug $(GITHUB_REPO) 2>&1 | tee $@ - - -## Docker services (experimental) -docker: - docker pull $(PROJECT)/student - -docker run -d -p 8888:8888 --name fuzzing-book-instance $(PROJECT)/student - -docker-start: - docker start fuzzing-book-instance - sleep 2 - @URL=$$(docker exec -it fuzzing-book-instance jupyter notebook list | grep http | awk '{ print $$1 }'); echo $$URL; open $$URL - -docker-stop: - docker stop fuzzing-book-instance - - -## Getting rid of stray processes and workspaces -kill: - -pkill -HUP -l -f jupyter-lab Firefox.app firefox-bin runserver - $(RM) $$HOME/.jupyter/lab/workspaces/*.jupyterlab-workspace - -## Cleanup -AUX = *.aux *.bbl *.blg *.log *.out *.toc *.frm *.lof *.lot *.fls *.fdb_latexmk \ - $(PDF_TARGET)*.aux \ - $(PDF_TARGET)*.bbl \ - $(PDF_TARGET)*.blg \ - $(PDF_TARGET)*.log \ - $(PDF_TARGET)*.out \ - $(PDF_TARGET)*.toc \ - $(PDF_TARGET)*.frm \ - $(PDF_TARGET)*.lof \ - $(PDF_TARGET)*.lot \ - $(PDF_TARGET)*.fls \ - $(PDF_TARGET)*.xdv \ - $(PDF_TARGET)*.fdb_latexmk - -.PHONY: clean-code clean-chapters clean-book clean-aux clean-pdf -clean-code: - $(RM) $(PYS) $(PYS_OUT) - -clean-chapters: - $(RM) $(TEXS) $(PDFS) $(HTMLS) $(SLIDES) $(WORDS) $(MARKDOWNS) - $(RM) -r $(PDF_FILES) $(HTML_FILES) $(SLIDES_FILES) - -clean-book: - $(RM) $(BOOK_TEX) $(BOOK_PDF) $(BOOK_HTML) - $(RM) -r $(BOOK_HTML_FILES) $(BOOK_PDF_FILES) - -clean-aux clean-pdf: - $(RM) $(AUX) - -.PHONY: clean-full-notebooks clean-full clean-fulls -.PHONY: clean-rendered-notebooks clean-rendered clean-renders -.PHONY: clean-docs clean realclean -clean-full-notebooks clean-full clean-fulls: - $(RM) $(FULLS) - -clean-rendered-notebooks clean-rendered clean-renders: - $(RM) $(RENDERS) - -clean-docs: - $(RM) -r $(DOCS_TARGET)html $(DOCS_TARGET)code \ - $(DOCS_TARGET)slides $(DOCS_TARGET)index.html $(DOCS_TARGET)404.html \ $(DOCS_TARGET)PICS $(DOCS_TARGET)notebooks - -clean: clean-code clean-chapters clean-book clean-aux clean-docs clean-fulls clean-renders - @echo "All derived files deleted" - -realclean: clean - cd $(PDF_TARGET); $(RM) *.pdf - cd $(HTML_TARGET); $(RM) *.html; $(RM) -r *_files - cd $(SLIDES_TARGET); $(RM) *.html - cd $(CODE_TARGET); $(RM) *.py *.py.out .*.py.out - cd $(MYPY_TARGET); $(RM) *.py *.py.out .*.py.out - cd $(WORD_TARGET); $(RM) *.docx - cd $(MARKDOWN_TARGET); $(RM) *.md - @echo "All old files deleted" - - -## A bit of Makefile debugging -# See http://www.drdobbs.com/tools/debugging-makefiles/197003338# - -# Use "make print-VAR" to see the value of VAR, e.g. "make print-NBDEPEND" -print-%: ; @$(error $* = $($*) (defined as $* = $(value $*) from $(origin $*))) - -# Use "make DEBUG=1" to get better diagnostics why a command gets executed -ifdef DEBUG -OLD_SHELL := $(SHELL) -SHELL = $(warning creating $@ from $^: $? is newer)$(OLD_SHELL) -endif - - -## Dependencies as graph -NBDEPEND = $(SHARED)utils/nbdepend.py -SITEMAP_OPTIONS = --graph --transitive-reduction --project $(PROJECT) # --cluster-by-parts - -sitemap: $(SITEMAP_SVG) -$(SITEMAP_SVG): $(CHAPTER_SOURCES) $(NBDEPEND) - $(PYTHON) $(NBDEPEND) $(SITEMAP_OPTIONS) $(CHAPTER_SOURCES) > $@~ && mv $@~ $@ - @$(OPEN) $@ - -$(HTML_TARGET)/Tours.html: $(SITEMAP_SVG) -$(FULL_NOTEBOOKS)/Tours.ipynb: $(SITEMAP_SVG) -$(RENDERED_NOTEBOOKS)/Tours.ipynb: $(SITEMAP_SVG) - -$(HTML_TARGET)/00_Table_of_Contents.html: $(SITEMAP_SVG) -$(FULL_NOTEBOOKS)/00_Table_of_Contents.ipynb: $(SITEMAP_SVG) -$(RENDERED_NOTEBOOKS)/00_Table_of_Contents.ipynb: $(SITEMAP_SVG) - - -## Dependencies - should come at the very end -# See http://make.mad-scientist.net/papers/advanced-auto-dependency-generation/ for inspiration -$(DEPEND_TARGET)%.makefile: $(NOTEBOOKS)/%.ipynb - @echo "Rebuilding $@" - @test -d $(DEPEND_TARGET) || $(MKDIR) $(DEPEND_TARGET) - @for import in $$($(PYTHON) $(NBDEPEND) $<); do \ - if [ -f $(NOTEBOOKS)/$$import.ipynb ]; then \ - notebooks="$$notebooks $$""(NOTEBOOKS)/$$import.ipynb"; \ - imports="$$imports $$""(CODE_TARGET)$$import.py"; \ - mypys="$$mypys $$""(MYPY_TARGET)$$import.py"; \ - fi; \ - done; \ - ( \ - echo '# $(basename $(notdir $<)) dependencies'; \ - echo ''; \ - echo '$$''(FULL_NOTEBOOKS)/$(notdir $<):' $$notebooks; \ - echo ''; \ - echo '$$''(RENDERED_NOTEBOOKS)/$(notdir $<):' $$notebooks; \ - echo ''; \ - echo '$$''(CODE_TARGET).$(notdir $(<:%.ipynb=.%.py.out)):' $$imports; \ - echo ''; \ - echo '$$''(MYPY_TARGET).$(notdir $(<:%.ipynb=.%.py.out)):' $$mypys; \ - ) > $@ - - -.PHONY: depend -depend: $(DEPENDS) - -include $(wildcard $(DEPENDS)) diff --git a/Makefile b/Makefile new file mode 120000 index 000000000..5f6170eb4 --- /dev/null +++ b/Makefile @@ -0,0 +1 @@ +notebooks/shared/Makefile \ No newline at end of file diff --git a/fuzzingbook.bib b/fuzzingbook.bib deleted file mode 100644 index 7a3e36415..000000000 --- a/fuzzingbook.bib +++ /dev/null @@ -1,1756 +0,0 @@ -@comment{ fuzzingbook bibliography } -@comment{ All entries must have a 'url' entry the HTML version can link to! } - -@comment{ Define common abbreviations for non-BibTeX conversion } -@string{ jan = "January" } -@string{ feb = "February" } -@string{ mar = "March" } -@string{ apr = "April" } -@string{ may = "May" } -@string{ jun = "June" } -@string{ jul = "July" } -@string{ aug = "August" } -@string{ sep = "September" } -@string{ oct = "October" } -@string{ nov = "November" } -@string{ dec = "December" } - -@article{Purdom1972, - year={1972}, - issn={0006-3835}, - journal={BIT Numerical Mathematics}, - volume={12}, - number={3}, - doi={10.1007/BF01932308}, - title={A sentence generator for testing parsers}, - url={http://dx.doi.org/10.1007/BF01932308}, - publisher={Kluwer Academic Publishers}, - author={Purdom, Paul}, - pages={366-375}, - language={English} -} - -@article{Miller1990, - author = {Miller, Barton P. and Fredriksen, Louis and So, Bryan}, - title = {An Empirical Study of the Reliability of {UNIX} Utilities}, - journal = {Commun. ACM}, - issue_date = {Dec. 1990}, - volume = {33}, - number = {12}, - month = dec, - year = {1990}, - issn = {0001-0782}, - pages = {32--44}, - numpages = {13}, - url = {http://doi.acm.org/10.1145/96267.96279}, - doi = {10.1145/96267.96279}, - acmid = {96279}, - publisher = {ACM}, - address = {New York, NY, USA} -} - -@book{Pezze2008, - title={Software Testing and Analysis: Process, Principles, and Techniques}, - author={Pezz{\`e}, Mauro and Young, Michal}, - year={2008}, - publisher={John Wiley \& Sons}, - url={http://ix.cs.uoregon.edu/~michal/book/}, -} - -@article{Luke2000, - author = {Luke, S.}, - title = {Two Fast Tree-creation Algorithms for Genetic Programming}, - journal = {Transactions on Evolutionary Computation}, - issue_date = {September 2000}, - volume = {4}, - number = {3}, - month = sep, - year = {2000}, - issn = {1089-778X}, - pages = {274--283}, - numpages = {10}, - url = {https://doi.org/10.1109/4235.873237}, - doi = {10.1109/4235.873237}, - acmid = {2221499}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, -} - -@book{fuzzingbook, - author = {Andreas Zeller and Rahul Gopinath and Marcel B{\"o}hme and Gordon Fraser and Christian Holler}, - booktitle = {The Fuzzing Book}, - title = {The Fuzzing Book}, - howpublished = {\url{https://www.fuzzingbook.org/}}, - note = {Retrieved 2019-09-09 13:49:23+02:00}, - url = {https://www.fuzzingbook.org/}, - urldate = {2019-09-09 13:49:23+02:00} -} - -@Article{Burkhardt1967, -author="Burkhardt, W. H.", -title="Generating test programs from syntax", -journal="Computing", -year="1967", -month="Mar", -day="01", -volume="2", -number="1", -pages="53--73", -abstract="The many faces of programming and systems development demand an immense amount of mechanical routine work. The present paper tries to explain some areas where automation of many tasks may be of great help. One special area, where progress seems to lag behind unduly, can be found in debugging, testing, and diagnosing systems. Here we attempted the generation of programs automatically from a definition of a problem and the characteristics of programs for its solution by a software system, which has been specially designed for this purpose. It has been indicated how the ideas underlying this project may be applied successfully to other areas.", -issn="1436-5057", -doi="10.1007/BF02235512", -url="https://doi.org/10.1007/BF02235512" -} - -@inproceedings{Slutz1998, - author = {Slutz, Donald R.}, - title = {Massive Stochastic Testing of SQL}, - booktitle = {Proceedings of the 24rd International Conference on Very Large Data Bases}, - series = {VLDB '98}, - year = {1998}, - isbn = {1-55860-566-5}, - pages = {618--622}, - numpages = {5}, - original_url = {http://dl.acm.org/citation.cfm?id=645924.671199}, - url = {https://www.microsoft.com/en-us/research/publication/massive-stochastic-testing-of-sql/}, - acmid = {671199}, - publisher = {Morgan Kaufmann Publishers Inc.}, - address = {San Francisco, CA, USA}, -} - -@article{Zeller2002, - author = {Zeller, Andreas and Hildebrandt, Ralf}, - title = {Simplifying and Isolating Failure-Inducing Input}, - journal = {IEEE Trans. Softw. Eng.}, - issue_date = {February 2002}, - volume = {28}, - number = {2}, - month = feb, - year = {2002}, - issn = {0098-5589}, - pages = {183--200}, - numpages = {18}, - url = {http://dx.doi.org/10.1109/32.988498}, - doi = {10.1109/32.988498}, - acmid = {506206}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, - keywords = {Automated debugging, debugging aids, testing tools, combinatorial testing, diagnostics, tracing.}, -} - -@book{Kernighan1999, - author = {Kernighan, Brian W. and Pike, Rob}, - title = {The Practice of Programming}, - year = {1999}, - isbn = {0-201-61586-X}, - publisher = {Addison-Wesley Longman Publishing Co., Inc.}, - address = {Boston, MA, USA}, -} - -@book{Panini350bce, -author = {Dak{\d{s}}iputra P{\=a}{\d{n}}ini}, -title = {Ash{\d{t}}{\=a}dhy{\=a}y{\=i}}, -publisher = {Sanskrit Oral Tradition}, -year = {350 BCE}, -url = {https://en.wikipedia.org/wiki/P%C4%81%E1%B9%87ini%23A%E1%B9%A3%E1%B9%AD%C4%81dhy%C4%81y%C4%AB}, -urldate = {2018-10-10 12:15:00+02:00} -} - -@article{Petke2015, -author={J. Petke and M. B. Cohen and M. Harman and S. Yoo}, -journal={IEEE Transactions on Software Engineering}, -title={Practical Combinatorial Interaction Testing: Empirical Findings on Efficiency and Early Fault Detection}, -year={2015}, -volume={41}, -number={9}, -pages={901-924}, -keywords={genetic algorithms;greedy algorithms;program testing;simulated annealing;software fault tolerance;combinatorial interaction testing;early fault detection;software system configuration space;simulated annealing;SA;greedy algorithm;CIT test suite generation;constraint handling;pairwise testing;genetic algorithm;Testing;Simulated annealing;Genetic algorithms;Fault detection;Greedy algorithms;Turning;Flexible printed circuits;Combinatorial Interaction Testing;Prioritisation;Empirical Studies;Software Testing;Combinatorial interaction testing;prioritisation;empirical studies;software testing}, -doi={10.1109/TSE.2015.2421279}, -ISSN={0098-5589}, -month={Sept},} - -@inproceedings{Herfert2017, - author = {Herfert, Satia and Patra, Jibesh and Pradel, Michael}, - title = {Automatically Reducing Tree-structured Test Inputs}, - booktitle = {Proceedings of the 32Nd IEEE/ACM International Conference on Automated Software Engineering}, - series = {ASE 2017}, - year = {2017}, - isbn = {978-1-5386-2684-9}, - location = {Urbana-Champaign, IL, USA}, - pages = {861--871}, - numpages = {11}, - url = {http://dl.acm.org/citation.cfm?id=3155562.3155669}, - acmid = {3155669}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, -} - -@article{redziejowski2008, - author = {Redziejowski, Roman R.}, - title = {Some Aspects of Parsing Expression Grammar}, - journal = {Fundam. Inf.}, - issue_date = {January 2008}, - volume = {85}, - number = {1-4}, - month = jan, - year = {2008}, - issn = {0169-2968}, - pages = {441--451}, - numpages = {11}, - url = {http://dl.acm.org/citation.cfm?id=2365896.2365924}, - acmid = {2365924}, - publisher = {IOS Press}, - address = {Amsterdam, The Netherlands, The Netherlands}, -} - -@article{Valiant1975, - author = {Valiant, Leslie G.}, - title = {General Context-free Recognition in Less Than Cubic Time}, - journal = {J. Comput. Syst. Sci.}, - issue_date = {April, 1975}, - volume = {10}, - number = {2}, - month = apr, - year = {1975}, - issn = {0022-0000}, - pages = {308--315}, - numpages = {8}, - url = {http://dx.doi.org/10.1016/S0022-0000(75)80046-8}, - doi = {10.1016/S0022-0000(75)80046-8}, - acmid = {1740048}, - publisher = {Academic Press, Inc.}, - address = {Orlando, FL, USA}, -} - -@article{Lee2002, - author = {Lee, Lillian}, - title = {Fast Context-free Grammar Parsing Requires Fast Boolean Matrix Multiplication}, - journal = {J. ACM}, - issue_date = {January 2002}, - volume = {49}, - number = {1}, - month = jan, - year = {2002}, - issn = {0004-5411}, - pages = {1--15}, - numpages = {15}, - url = {http://doi.acm.org/10.1145/505241.505242}, - doi = {10.1145/505241.505242}, - acmid = {505242}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Boolean matrix multiplication, context-free grammar parsing}, -} - -@inproceedings{LeGall2014, - author = {Le Gall, Fran\c{c}ois}, - title = {Powers of Tensors and Fast Matrix Multiplication}, - booktitle = {Proceedings of the 39th International Symposium on Symbolic and Algebraic Computation}, - series = {ISSAC '14}, - year = {2014}, - isbn = {978-1-4503-2501-1}, - location = {Kobe, Japan}, - pages = {296--303}, - numpages = {8}, - url = {http://doi.acm.org/10.1145/2608628.2608664}, - doi = {10.1145/2608628.2608664}, - acmid = {2608664}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {algebraic complexity theory, matrix multiplication}, -} - -@article{Hopcroft2001, - title={Introduction to automata theory, languages, and computation}, - author={Hopcroft, John E and Motwani, Rajeev and Ullman, Jeffrey D}, - journal={Acm Sigact News}, - volume={32}, - number={1}, - pages={60--65}, - year={2001}, - publisher={ACM} -} - -@book{Myers2004, - author = {Myers, Glenford J. and Sandler, Corey}, - title = {The Art of Software Testing}, - year = {2004}, - isbn = {0471469122}, - publisher = {John Wiley \&\#38; Sons, Inc.}, - url = {https://dl.acm.org/citation.cfm?id=983238}, - address = {USA}, -} - -@book{Beizer1990, - author = {Beizer, Boris}, - title = {Software Testing Techniques}, - year = {1990}, - isbn = {0442245920}, - publisher = {John Wiley \& Sons, Inc.}, - url = {https://dl.acm.org/citation.cfm?id=79060}, - address = {New York, NY, USA}, -} - -@book{Sutton2007, - author = {Sutton, Michael and Greene, Adam and Amini, Pedram}, - title = {Fuzzing: Brute Force Vulnerability Discovery}, - year = {2007}, - isbn = {0321446119}, - url = {http://www.fuzzing.org/}, - publisher = {Addison-Wesley Professional}, -} - -@book{Takanen2008, - author = {Takanen, Ari and DeMott, Jared and Miller, Charlie}, - title = {Fuzzing for Software Security Testing and Quality Assurance}, - year = {2008}, - isbn = {1596932147, 9781596932142}, - edition = {1}, - publisher = {Artech House, Inc.}, - url = {http://us.artechhouse.com/Fuzzing-for-Software-Security-Testing-and-Quality-Assurance-Second-Edition-P1930.aspx}, - address = {Norwood, MA, USA}, -} - -@article{Dai2010, - author = {Dai, Huning and Murphy, Christian and Kaiser, Gail}, - title = {{CONFU}: Configuration Fuzzing Testing Framework for Software Vulnerability Detection}, - journal = {Int. J. Secur. Softw. Eng.}, - issue_date = {July 2010}, - volume = {1}, - number = {3}, - month = jul, - year = {2010}, - issn = {1947-3036}, - pages = {41--55}, - numpages = {15}, - url = {http://dx.doi.org/10.4018/jsse.2010070103}, - doi = {10.4018/jsse.2010070103}, - acmid = {2441117}, - publisher = {IGI Global}, - address = {Hershey, PA, USA}, - keywords = {Configuration Fuzzing, Fuzz Testing, In Vivo Testing, Security Invariants, Vulnerability}, -} - -@article{Earley1970, - author = {Earley, Jay}, - title = {An Efficient Context-free Parsing Algorithm}, - journal = {Commun. ACM}, - issue_date = {Feb 1970}, - volume = {13}, - number = {2}, - month = feb, - year = {1970}, - issn = {0001-0782}, - pages = {94--102}, - numpages = {9}, - url = {http://doi.acm.org/10.1145/362007.362035}, - doi = {10.1145/362007.362035}, - acmid = {362035}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {compilers, computational complexity, context-free grammar, parsing, syntax analysis}, -} - -@article{Aycock2002, - title={Practical Earley Parsing}, - author={John Aycock and R. Nigel Horspool}, - journal={The Computer Journal}, - year={2002}, - volume={45}, - pages={620-630} -} - -@article{Leo1991, -title = "A general context-free parsing algorithm running in linear time on every {LR(k)} grammar without using lookahead", -journal = "Theoretical Computer Science", -volume = "82", -number = "1", -pages = "165 - 176", -year = "1991", -issn = "0304-3975", -doi = "https://doi.org/10.1016/0304-3975(91)90180-A", -url = "http://www.sciencedirect.com/science/article/pii/030439759190180A", -author = "Joop M.I.M. Leo" -} - -@inproceedings{Elbaum2006, - author = {Elbaum, Sebastian and Chin, Hui Nee and Dwyer, Matthew B. and Dokulil, Jonathan}, - title = {Carving Differential Unit Test Cases from System Test Cases}, - booktitle = {Proceedings of the 14th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, - series = {SIGSOFT '06/FSE-14}, - year = {2006}, - isbn = {1-59593-468-5}, - location = {Portland, Oregon, USA}, - pages = {253--264}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/1181775.1181806}, - doi = {10.1145/1181775.1181806}, - acmid = {1181806}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {automated test generation, carving and replay, regression testing}, -} - -@inproceedings{Lin2008, - author = {Lin, Zhiqiang and Zhang, Xiangyu}, - title = {Deriving Input Syntactic Structure from Execution}, - booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, - series = {SIGSOFT '08/FSE-16}, - year = {2008}, - isbn = {978-1-59593-995-1}, - location = {Atlanta, Georgia}, - pages = {83--93}, - numpages = {11}, - url = {http://doi.acm.org/10.1145/1453101.1453114}, - doi = {10.1145/1453101.1453114}, - acmid = {1453114}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {bottom-up grammar, control dependence, input lineage, reverse engineering, syntax tree, top-down grammar}, -} -@article{Ford2004, - author = {Ford, Bryan}, - title = {Parsing Expression Grammars: A Recognition-based Syntactic Foundation}, - journal = {SIGPLAN Not.}, - issue_date = {January 2004}, - volume = {39}, - number = {1}, - month = jan, - year = {2004}, - issn = {0362-1340}, - pages = {111--122}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/982962.964011}, - doi = {10.1145/982962.964011}, - acmid = {964011}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {BNF, GTDPL, TDPL, context-free grammars, lexical analysis, packrat parsing, parsing expression grammars, regular expressions, scannerless parsing, syntactic predicates, unified grammars}, -} -@article{Ford2002, - author = {Ford, Bryan}, - title = {Packrat Parsing:: Simple, Powerful, Lazy, Linear Time, Functional Pearl}, - journal = {SIGPLAN Not.}, - issue_date = {September 2002}, - volume = {37}, - number = {9}, - month = sep, - year = {2002}, - issn = {0362-1340}, - pages = {36--47}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/583852.581483}, - doi = {10.1145/583852.581483}, - acmid = {581483}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Haskell, backtracking, lexical analysis, memoization, parser combinators, scannerless parsing, top-down parsing}, -} - -@inproceedings{Holler2012, - author = {Holler, Christian and Herzig, Kim and Zeller, Andreas}, - title = {Fuzzing with Code Fragments}, - booktitle = {Proceedings of the 21st USENIX Conference on Security Symposium}, - series = {Security'12}, - year = {2012}, - location = {Bellevue, WA}, - pages = {38--38}, - numpages = {1}, - url = {https://www.usenix.org/system/files/conference/usenixsecurity12/sec12-final73.pdf}, - acmid = {2362831}, - publisher = {USENIX Association}, - address = {Berkeley, CA, USA}, -} - -@article{Newcomb1881, - author = {Simon Newcomb}, - title = {Note on the frequency of use of the different digits in natural numbers}, - journal = {American Journal of Mathematics}, - volume = {4}, - number = {1--4}, - pages = {39--40}, - year = {1881}, - url = {http://www.jstor.org/stable/2369148}, -} - -@article{Benford1938, - author = "Frank Benford", - title = "The Law of Anomalous Numbers", - journal = "Proceedings of the American Philosophical Society", - volume = "78", - number = "4", - pages = "551--572", - month = mar, - year = "1938", - url = {http://links.jstor.org/sici?sici=0003-049X%2819380331%2978%3A4%3C551%3ATLOAN%3E2.0.CO%3B2-G}, -} - -@article{Chomsky1956, - author = {Chomsky, Noam}, - title = {Three models for the description of language}, - journal = {IRE Transactions on Information Theory}, - pages = {113--124}, - volume = 2, - year = 1956, - url = {https://chomsky.info/wp-content/uploads/195609-.pdf} -} - -@article{Hanford1970, - author = {Hanford, Kenneth V.}, - title = {Automatic Generation of Test Cases}, - journal = {IBM Syst. J.}, - issue_date = {December 1970}, - volume = {9}, - number = {4}, - month = dec, - year = {1970}, - issn = {0018-8670}, - pages = {242--257}, - numpages = {16}, - url = {http://dx.doi.org/10.1147/sj.94.0242}, - doi = {10.1147/sj.94.0242}, - acmid = {1663480}, - publisher = {IBM Corp.}, - address = {Riverton, NJ, USA}, -} - -@inproceedings{Yang2011, - author = {Yang, Xuejun and Chen, Yang and Eide, Eric and Regehr, John}, - title = {Finding and Understanding Bugs in {C} Compilers}, - booktitle = {Proceedings of the 32Nd ACM SIGPLAN Conference on Programming Language Design and Implementation}, - series = {PLDI '11}, - year = {2011}, - isbn = {978-1-4503-0663-8}, - location = {San Jose, California, USA}, - pages = {283--294}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/1993498.1993532}, - doi = {10.1145/1993498.1993532}, - acmid = {1993532}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {automated testing, compiler defect, compiler testing, random program generation, random testing}, -} - -@inproceedings{Le2014, - author = {Le, Vu and Afshari, Mehrdad and Su, Zhendong}, - title = {Compiler Validation via Equivalence Modulo Inputs}, - booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming Language Design and Implementation}, - series = {PLDI '14}, - year = {2014}, - isbn = {978-1-4503-2784-8}, - location = {Edinburgh, United Kingdom}, - pages = {216--226}, - numpages = {11}, - url = {http://doi.acm.org/10.1145/2594291.2594334}, - doi = {10.1145/2594291.2594334}, - acmid = {2594334}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {automated testing, compiler testing, equivalent program variants, miscompilation}, -} - -@book{Aho2006, - author = {Aho, Alfred V. and Lam, Monica S. and Sethi, Ravi and Ullman, Jeffrey D.}, - title = {Compilers: Principles, Techniques, and Tools (2nd edition)}, - year = {2006}, - isbn = {0321486811}, - publisher = {Addison-Wesley Longman Publishing Co., Inc.}, - url = {https://www.pearson.com/us/higher-education/program/Aho-Compilers-Principles-Techniques-and-Tools-2nd-Edition/PGM167067.html}, - address = {Boston, MA, USA}, -} - - -@inproceedings{Hodovan2018, - title = {Grammarinator: A Grammar-based Open Source Fuzzer}, - author = {Hodov{\'a}n, Ren{\'a}ta and Kiss, {\'A}kos and Tibor Gyim{\'o}thy}, - booktitle = {Proceedings of the 9th Workshop on Automating Test Case Design, Selection and Evaluation (A-TEST 2018)}, - year = {2018}, - month = nov, - url = {https://www.researchgate.net/publication/328510752_Grammarinator_a_grammar-based_open_source_fuzzer}, - address = {Lake Buena Vista, Florida, USA}, -} - -@article{ogden1968helpful, - title={A helpful result for proving inherent ambiguity}, - author={Ogden, William}, - journal={Mathematical systems theory}, - volume={2}, - number={3}, - pages={191--194}, - year={1968}, - publisher={Springer} -} - -@article{scott2010gll, - title={GLL parsing}, - author={Scott, Elizabeth and Johnstone, Adrian}, - journal={Electronic Notes in Theoretical Computer Science}, - volume={253}, - number={7}, - pages={177--189}, - year={2010}, - publisher={Elsevier} -} - -@book{tomita2012generalized, - title={Generalized LR parsing}, - author={Tomita, Masaru}, - year={2012}, - publisher={Springer Science \& Business Media} -} - -@article{tomita1987efficient, - title={An efficient augmented-context-free parsing algorithm}, - author={Tomita, Masaru}, - journal={Computational linguistics}, - volume={13}, - number={1-2}, - pages={31--46}, - year={1987}, - publisher={MIT Press} -} - -@article{grune2008parsing, - title={Parsing techniques A Practical Guide}, - author={Grune, Dick and Jacobs, Ceriel JH}, - journal={A practical guide}, - year={2008} -} - -@inproceedings{pingali2015graphical, - title={A Graphical Model for Context-Free Grammar Parsing}, - author={Pingali, Keshav and Bilardi, Gianfranco}, - booktitle={International Conference on Compiler Construction}, - pages={3--27}, - year={2015}, - organization={Springer} -} - -@article{qi2018generalized, - title={Generalized Earley Parser: Bridging Symbolic Grammars and Sequence Data for Future Prediction}, - author={Qi, Siyuan and Jia, Baoxiong and Zhu, Song-Chun}, - journal={arXiv preprint arXiv:1806.03497}, - year={2018} -} - -@article{bar1961formal, - title={On formal properties of simple phrase structure grammars}, - author={Bar-Hillel, Yehoshua and Perles, Micha and Shamir, Eli}, - journal={STUF-Language Typology and Universals}, - volume={14}, - number={1-4}, - pages={143--172}, - year={1961}, - publisher={AKADEMIE VERLAG} -} - -@techreport{Patra2016, - title={Learning to fuzz: Application-independent fuzz testing with probabilistic, generative models of input data}, - author={Patra, Jibesh and Pradel, Michael}, - institution = {TU Darmstadt, Department of Computer Science}, - number = {TUD-CS-2016-14664}, - url = {http://mp.binaervarianz.de/TreeFuzz_TR_Nov2016.pdf}, - year={2016} -} - -@inproceedings{Claessen2000, - author = {Claessen, Koen and Hughes, John}, - title = {QuickCheck: A Lightweight Tool for Random Testing of Haskell Programs}, - booktitle = {Proceedings of the Fifth ACM SIGPLAN International Conference on Functional Programming}, - series = {ICFP '00}, - year = {2000}, - isbn = {1-58113-202-6}, - pages = {268--279}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/351240.351266}, - doi = {10.1145/351240.351266}, - acmid = {351266}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@inproceedings{Misherghi2006, - author = {Misherghi, Ghassan and Su, Zhendong}, - title = {{HDD}: Hierarchical Delta Debugging}, - booktitle = {Proceedings of the 28th International Conference on Software Engineering}, - series = {ICSE '06}, - year = {2006}, - isbn = {1-59593-375-1}, - location = {Shanghai, China}, - pages = {142--151}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1134285.1134307}, - doi = {10.1145/1134285.1134307}, - acmid = {1134307}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {automated debugging, delta debugging}, -} - -@inproceedings{Regehr2012, - author = {Regehr, John and Chen, Yang and Cuoq, Pascal and Eide, Eric and Ellison, Chucky and Yang, Xuejun}, - title = {Test-case Reduction for C Compiler Bugs}, - booktitle = {Proceedings of the 33rd ACM SIGPLAN Conference on Programming Language Design and Implementation}, - series = {PLDI '12}, - year = {2012}, - isbn = {978-1-4503-1205-9}, - location = {Beijing, China}, - pages = {335--346}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/2254064.2254104}, - doi = {10.1145/2254064.2254104}, - acmid = {2254104},} - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {automated testing, bug reporting, compiler defect, compiler testing, random testing, test-case minimization}, -} - -@techreport{Pavese2018, - author = {Esteban Pavese and Ezekiel Soremekun and Nikolas Havrikov and Lars Grunske and Andreas Zeller}, - title = {Inputs from Hell: Generating Uncommon Inputs from Common Samples}, - institution = {CISPA Helmholtz Center for Information Security}, - url = {http://arxiv.org/abs/1812.07525}, - year={2018} -} - -@inproceedings{Hoschele2017, - author = {H{\"o}schele, Matthias and Zeller, Andreas}, - title = {Mining Input Grammars with AUTOGRAM}, - booktitle = {Proceedings of the 39th International Conference on Software Engineering Companion}, - series = {ICSE-C '17}, - year = {2017}, - isbn = {978-1-5386-1589-8}, - location = {Buenos Aires, Argentina}, - pages = {31--34}, - numpages = {4}, - url = {https://doi.org/10.1109/ICSE-C.2017.14}, - doi = {10.1109/ICSE-C.2017.14}, - acmid = {3098355}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, - keywords = {context-free grammars, dynamic tainting, fuzzing, input formats}, -} - -@techreport{Kampmann2018, - title={Carving Parameterized Unit Tests}, - institution={CISPA Helmholtz Center for Information Security}, - author={Kampmann, Alexander and Zeller, Andreas}, - journal={arXiv preprint arXiv:1812.07932}, - url={https://arxiv.org/abs/1812.07932}, - month=dec, - year={2018} -} - -@book{higuera2010grammatical, - title={Grammatical inference: learning automata and grammars}, - author={De la Higuera, Colin}, - year={2010}, - publisher={Cambridge University Press} -} - -@article{clark2013learning, - title={Learning trees from strings: A strong learning algorithm for some context-free grammars}, - author={Clark, Alexander}, - journal={The Journal of Machine Learning Research}, - volume={14}, - number={1}, - pages={3537--3559}, - year={2013}, - publisher={JMLR. org} -} - -@article{king1976symbolic, - author = {King, James C.}, - title = {Symbolic Execution and Program Testing}, - journal = {Commun. ACM}, - issue_date = {July 1976}, - volume = {19}, - number = {7}, - month = jul, - year = {1976}, - issn = {0001-0782}, - pages = {385--394}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/360248.360252}, - doi = {10.1145/360248.360252}, - acmid = {360252}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@inproceedings{wang2017angr, - title={Angr-The Next Generation of Binary Analysis}, - author={Wang, Fish and Shoshitaishvili, Yan}, - booktitle={Cybersecurity Development (SecDev), 2017 IEEE}, - pages={8--9}, - year={2017}, - organization={IEEE} -} - -@article{godefroid2012sage, - title={{SAGE}: whitebox fuzzing for security testing}, - author={Godefroid, Patrice and Levin, Michael Y and Molnar, David}, - journal={Queue}, - volume={10}, - number={1}, - pages={20}, - year={2012}, - publisher={ACM} -} - -@inproceedings{stephens2016driller, - title={Driller: Augmenting Fuzzing Through Selective Symbolic Execution.}, - author={Stephens, Nick and Grosen, John and Salls, Christopher and Dutcher, Andrew and Wang, Ruoyu and Corbetta, Jacopo and Shoshitaishvili, Yan and Kruegel, Christopher and Vigna, Giovanni}, - booktitle={NDSS}, - volume={16}, - pages={1--16}, - year={2016} -} - -@inproceedings{Memon2001, - author = {Memon, Atif M. and Soffa, Mary Lou and Pollack, Martha E.}, - title = {Coverage Criteria for {GUI} Testing}, - booktitle = {Proceedings of the 8th European Software Engineering Conference Held Jointly with 9th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, - series = {ESEC/FSE-9}, - year = {2001}, - isbn = {1-58113-390-1}, - location = {Vienna, Austria}, - pages = {256--267}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/503209.503244}, - doi = {10.1145/503209.503244}, - acmid = {503244}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {GUI test coverage, GUI testing, component testing, event-based coverage, event-flow graph, integration tree}, -} - -@inproceedings{Memon2003, - author = {Memon, Atif and Banerjee, Ishan and Nagarajan, Adithya}, - title = {{GUI} Ripping: Reverse Engineering of Graphical User Interfaces for Testing}, - booktitle = {Proceedings of the 10th Working Conference on Reverse Engineering}, - series = {WCRE '03}, - year = {2003}, - isbn = {0-7695-2027-8}, - pages = {260--}, - url = {http://dl.acm.org/citation.cfm?id=950792.951350}, - acmid = {951350}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@article{Mesbah2012, - author = {Mesbah, Ali and van Deursen, Arie and Lenselink, Stefan}, - title = {Crawling Ajax-Based Web Applications Through Dynamic Analysis of User Interface State Changes}, - journal = {ACM Trans. Web}, - issue_date = {March 2012}, - volume = {6}, - number = {1}, - month = mar, - year = {2012}, - issn = {1559-1131}, - pages = {3:1--3:30}, - articleno = {3}, - numpages = {30}, - url = {http://doi.acm.org/10.1145/2109205.2109208}, - doi = {10.1145/2109205.2109208}, - acmid = {2109208}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Ajax, Crawling, DOM crawling, Web 2.0, dynamic analysis, hidden web}, -} - - -@inproceedings{Conti2010, - author = {Conti, Juan Jos{\'e} and Russo, Alejandro}, - title = {A Taint Mode for Python via a Library}, - booktitle = {Proceedings of the 15th Nordic Conference on Information Security Technology for Applications}, - series = {NordSec'10}, - year = {2012}, - isbn = {978-3-642-27936-2}, - location = {Espoo, Finland}, - pages = {210--222}, - numpages = {13}, - url = {http://dx.doi.org/10.1007/978-3-642-27937-9_15}, - doi = {10.1007/978-3-642-27937-9_15}, - acmid = {2341484}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, -} - -@article{siever1999perl, - title={Perl in a Nutshell}, - author={Siever, Ellen and Spainhour, Stephen and Patwardhan, Nathan}, - year={1999}, - publisher={O'Reilly \& Associates, Inc.} -} - -@article{Barsotti2018, - title = {{PEF}: Python Error Finder}, - journal = {Electronic Notes in Theoretical Computer Science}, - volume = {339}, - pages = {21--41}, - year = {2018}, - note = {The XLII Latin American Computing Conference}, - issn = {1571-0661}, - doi = {https://doi.org/10.1016/j.entcs.2018.06.003}, - url = {http://www.sciencedirect.com/science/article/pii/S1571066118300471}, - author = {Dami{\'a}n Barsotti and Andr{\'e}s M. Bordese and Tom{\'a}s Hayes}, -} - -@techreport{PeerCheck, - title = {A peer architecture for lightweight symbolic execution}, - author = {A. Bruni and T. Disney and C. Flanagan}, - institution = {University of California, Santa Cruz}, - year = {2011}, - url = {https://hoheinzollern.files.wordpress.com/2008/04/seer1.pdf} -} - -@inproceedings{Larson2003, - author = {Larson, Eric and Austin, Todd}, - title = {High Coverage Detection of Input-related Security Facults}, - booktitle = {Proceedings of the 12th Conference on USENIX Security Symposium - Volume 12}, - series = {SSYM'03}, - year = {2003}, - location = {Washington, DC}, - pages = {9--9}, - numpages = {1}, - url = {http://dl.acm.org/citation.cfm?id=1251353.1251362}, - acmid = {1251362}, - publisher = {USENIX Association}, - address = {Berkeley, CA, USA}, -} - -@inproceedings{cadar2005execution, - title={Execution generated test cases: How to make systems code crash itself}, - author={Cadar, Cristian and Engler, Dawson}, - booktitle={International SPIN Workshop on Model Checking of Software}, - pages={2--23}, - year={2005}, - organization={Springer} -} - -@article{Ernst2001, - author = {Ernst, Michael D. and Cockrell, Jake and Griswold, William G. and Notkin, David}, - title = {Dynamically Discovering Likely Program Invariants to Support Program Evolution}, - journal = {IEEE Trans. Softw. Eng.}, - issue_date = {February 2001}, - volume = {27}, - number = {2}, - month = feb, - year = {2001}, - issn = {0098-5589}, - pages = {99--123}, - numpages = {25}, - url = {https://doi.org/10.1109/32.908957}, - doi = {10.1109/32.908957}, - acmid = {373397}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, - keywords = {Program invariants, formal specification, software evolution, dynamic analysis, execution traces, logical inference, pattern recognition.}, - url = {https://homes.cs.washington.edu/~mernst/pubs/invariants-tse2001.pdf} -} - -@inproceedings{Pacheco2005, - author = {Pacheco, Carlos and Ernst, Michael D.}, - title = {Eclat: Automatic Generation and Classification of Test Inputs}, - booktitle = {Proceedings of the 19th European Conference on Object-Oriented Programming}, - series = {ECOOP'05}, - year = {2005}, - isbn = {3-540-27992-X, 978-3-540-27992-1}, - location = {Glasgow, UK}, - pages = {504--527}, - numpages = {24}, - url = {http://dx.doi.org/10.1007/11531142_22}, - doi = {10.1007/11531142_22}, - acmid = {2144921}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, - url = {https://homes.cs.washington.edu/~mernst/pubs/classify-tests-ecoop2005.pdf} -} - -@inproceedings{Ammons2002, - author = {Ammons, Glenn and Bod\'{\i}k, Rastislav and Larus, James R.}, - title = {Mining Specifications}, - booktitle = {Proceedings of the 29th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages}, - series = {POPL '02}, - year = {2002}, - isbn = {1-58113-450-9}, - location = {Portland, Oregon}, - pages = {4--16}, - numpages = {13}, - url = {http://doi.acm.org/10.1145/503272.503275}, - doi = {10.1145/503272.503275}, - acmid = {503275}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@misc{lipton1971fault, - title={Fault diagnosis of computer programs}, - author={Lipton, Richard J}, - year={1971}, - publisher={Carnegie Mellon Univ., Tech. Rep} -} - -@article{jia2011analysis, - title={An analysis and survey of the development of mutation testing}, - author={Jia, Yue and Harman, Mark}, - journal={IEEE transactions on software engineering}, - volume={37}, - number={5}, - pages={649--678}, - year={2011}, - publisher={IEEE} -} - -@incollection{papadakis2019mutation, - title={Mutation testing advances: an analysis and survey}, - author={Papadakis, Mike and Kintis, Marinos and Zhang, Jie and Jia, Yue and Le Traon, Yves and Harman, Mark}, - booktitle={Advances in Computers}, - volume={112}, - pages={275--378}, - year={2019}, - publisher={Elsevier} -} - -@article{boehme2018species, - author={B{\"o}hme, Marcel}, - journal={ACM Transactions on Software Engineering and Methodology}, - title={{STADS}: Software Testing as Species Discovery}, - issue_date = {June 2018}, - volume = {27}, - number = {2}, - month = jun, - year = {2018}, - pages = {7:1--7:52}, - articleno = {7}, - numpages = {52}, - doi = {10.1145/3210309} -} - -@article{boehme2018greybox, -author={B{\"o}hme, Marcel and Pham, Van-Thuan and Roychoudhury, Abhik}, -journal={IEEE Transactions on Software Engineering}, -title={Coverage-based Greybox Fuzzing as {Markov} Chain}, -url={https://mboehme.github.io/paper/CCS16.pdf}, -year={2018}, -pages={1-18} -} - -@inproceedings{boehme2017greybox, - author = {B{\"o}hme, Marcel and Pham, Van-Thuan and Nguyen, Manh-Dung and Roychoudhury, Abhik}, - title = {Directed Greybox Fuzzing}, - booktitle = {Proceedings of the 24th ACM Conference on Computer and Communications Security}, - series = {CCS}, - year = {2017}, - pages = {1-16}, - url = {https://mboehme.github.io/paper/CCS17.pdf}, - numpages = {16} -} - -@article{boehme2016efficiency, -author={B{\"o}hme, Marcel and Paul, Soumya}, -journal={IEEE Transactions on Software Engineering}, -title={A Probabilistic Analysis of the Efficiency of Automated Software Testing}, -year={2016}, -volume={42}, -number={4}, -pages={345-360}, -keywords={Efficient Testing;Error-based Partitioning;Partition Testing;Random Testing;Testing Theory}, -doi={10.1109/TSE.2015.2487274}, -ISSN={0098-5589}, -month={April}, -url={https://mboehme.github.io/paper/TSE15.pdf} -} - -@techreport{Pham2018aflsmart, - title={Smart Greybox Fuzzing}, - institution={National University of Singapore, Singapore and Monash University, Australia and University Politehnica of Bucharest, Romania}, - author={Van-Thuan Pham and Marcel B{\"o}hme and Andrew E. Santosa and Alexandru R\u{a}zvan C\u{a}ciulescu and Abhik Roychoudhury}, - journal={arXiv preprint arXiv:1811.09447}, - url={https://arxiv.org/abs/1811.09447}, - month=nov, - year={2018} -} - -@inproceedings{Wang2019superion, - title={Superion: Grammar-Aware Greybox Fuzzing}, - author={Junjie Wang and Bihuan Chen and Lei Wei and Yang Liu}, - booktitle = {Proceedings of ICSE 2019}, - year = {2019}, - url = {https://2019.icse-conferences.org/event/icse-2019-technical-papers-superion-grammar-aware-greybox-fuzzing} -} - -@inproceedings{Aschermann2019nautilus, - title={{NAUTILUS:} Fishing for Deep Bugs with Grammars}, - author={Cornelius Aschermann and Tommaso Frassetto and Thorsten Holz and Patrick Jauernig and Ahmad-Reza Sadeghi and Daniel Teuchert}, - booktitle = {Proceedings of NDSS 2019}, - year = {2019}, - url = {https://www.ndss-symposium.org/ndss-paper/nautilus-fishing-for-deep-bugs-with-grammars/} -} - -@inproceedings{Bettenburg2008, - author = {Bettenburg, Nicolas and Just, Sascha and Schr\"{o}ter, Adrian and Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas}, - title = {What Makes a Good Bug Report?}, - booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, - series = {SIGSOFT '08/FSE-16}, - year = {2008}, - isbn = {978-1-59593-995-1}, - location = {Atlanta, Georgia}, - pages = {308--318}, - numpages = {11}, - url = {http://thomas-zimmermann.com/publications/files/bettenburg-fse-2008.pdf}, - acmid = {1453146}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@inproceedings{Godefroid2017, - author = {Godefroid, Patrice and Peleg, Hila and Singh, Rishabh}, - title = {{Learn\&{}Fuzz}: Machine Learning for Input Fuzzing}, - booktitle = {Proceedings of the 32nd IEEE/ACM International Conference on Automated Software Engineering}, - series = {ASE 2017}, - year = {2017}, - isbn = {978-1-5386-2684-9}, - location = {Urbana-Champaign, IL, USA}, - pages = {50--59}, - numpages = {10}, - url = {http://dl.acm.org/citation.cfm?id=3155562.3155573}, - acmid = {3155573}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, - keywords = {deep learning, fuzzing, grammar learning, grammar-based fuzzing}, -} - -@inproceedings{Sun2018, - author = {Sun, Chengnian and Li, Yuanbo and Zhang, Qirun and Gu, Tianxiao and Su, Zhendong}, - title = {Perses: Syntax-guided Program Reduction}, - booktitle = {Proceedings of the 40th International Conference on Software Engineering}, - series = {ICSE '18}, - year = {2018}, - isbn = {978-1-4503-5638-1}, - location = {Gothenburg, Sweden}, - pages = {361--371}, - numpages = {11}, - url = {http://doi.acm.org/10.1145/3180155.3180236}, - doi = {10.1145/3180155.3180236}, - acmid = {3180236}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {debugging, delta debugging, program reduction}, -} - -@book{Aniche2020, - title={Software Testing: From Theory to Practice}, - author={Maur{\'i}cio Aniche and Arie van Deursen}, - year={2020}, - url={https://sttp.site}, -} - -@inproceedings{z3, -author = {De Moura, Leonardo and Bj\o{}rner, Nikolaj}, -title = {{Z3}: An Efficient {SMT} Solver}, -year = {2008}, -isbn = {3540787992}, -publisher = {Springer-Verlag}, -address = {Berlin, Heidelberg}, -abstract = {Satisfiability Modulo Theories (SMT) problem is a decision problem for logical first order formulas with respect to combinations of background theories such as: arithmetic, bit-vectors, arrays, and uninterpreted functions. Z3 is a new and efficient SMT Solver freely available from Microsoft Research. It is used in various software verification and analysis applications.}, -booktitle = {Proceedings of the Theory and Practice of Software, 14th International Conference on Tools and Algorithms for the Construction and Analysis of Systems}, -pages = {337--340}, -numpages = {4}, -location = {Budapest, Hungary}, -series = {TACAS'08/ETAPS'08}, -url={https://link.springer.com/chapter/10.1007/978-3-540-78800-3_24}, -} - -@book{zeller2009-why-programs-fail, - author = {Andreas Zeller}, - title = {Why Programs Fail - {A} Guide to Systematic Debugging, 2nd Edition}, - publisher = {Morgan Kaufmann}, - year = {2009}, - url = {http://www.whyprogramsfail.com/}, - isbn = {978-0-12-374515-6}, - timestamp = {Mon, 06 Feb 2017 15:25:22 +0100}, - biburl = {https://dblp.org/rec/books/daglib/0039904.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} -} - -@book{spinellis2016-effective-debugging, - author = {Diomidis Spinellis}, - title = {Effective Debugging: 66 Specific Ways to Debug Software and Systems}, - publisher = {Addison-Wesley Professional}, - year = {2016}, - url = {https://www.spinellis.gr/debugging/} -} - -@book{agans2006-debugging, -author = {Agans, David J.}, -title = {DeBugging: The 9 Indispensable Rules for Finding Even the Most Elusive Software and Hardware Problems}, -year = {2002}, -isbn = {0814471684}, -publisher = {American Management Assoc., Inc.}, -address = {USA}, -abstract = {From the Publisher: When the pressure is on to root out an elusive software or hardware glitch, what's needed is a cool head courtesy of a set of rulesguaranteed to work on any system, in any circumstance. Written in a frank but engaging style, Debugging provides simple, foolproof principles guaranteed to help find any bug quickly. This book makes those shelves of application-specific debugging books (on C++, Perl, Java, etc.) obsolete. It changes the way readers think about debugging, making those pesky problems suddenly much easier to find and fix. Illustrating the rules with real-life bug-detection war stories, the book shows readers how to: Understand the system: how perceiving the "roadmap" can hasten your journey Quit thinking and look: when hands-on investigation can't be avoided Isolate critical factors: why changing one element at a time can be an essential tool Keep an audit trail: how keeping a record of the debugging process can win the day Author Biography: David J. Agans (Milford, NH) is a recognized expert called in to help with tough debugging problems. He currently runs PointSource, a computer systems consultancy. He has worked with industrial control and monitoring systems, integrated circuit design, handheld PCs, videoconferencing, and countless other systems.}, -url = {https://dl.acm.org/doi/book/10.5555/555103} -} - -@article{Abreu2009, -author = {Abreu, Rui and Zoeteweij, Peter and Golsteijn, Rob and van Gemund, Arjan J. C.}, -title = {A Practical Evaluation of Spectrum-Based Fault Localization}, -year = {2009}, -issue_date = {November, 2009}, -publisher = {Elsevier Science Inc.}, -address = {USA}, -volume = {82}, -number = {11}, -issn = {0164-1212}, -url = {https://doi.org/10.1016/j.jss.2009.06.035}, -doi = {10.1016/j.jss.2009.06.035}, -abstract = {Spectrum-based fault localization (SFL) shortens the test-diagnose-repair cycle by reducing the debugging effort. As a light-weight automated diagnosis technique it can easily be integrated with existing testing schemes. Since SFL is based on discovering statistical coincidences between system failures and the activity of the different parts of a system, its diagnostic accuracy is inherently limited. Using a common benchmark consisting of the Siemens set and the space program, we investigate this diagnostic accuracy as a function of several parameters (such as quality and quantity of the program spectra collected during the execution of the system), some of which directly relate to test design. Our results indicate that the superior performance of a particular similarity coefficient, used to analyze the program spectra, is largely independent of test design. Furthermore, near-optimal diagnostic accuracy (exonerating over 80% of the blocks of code on average) is already obtained for low-quality error observations and limited numbers of test cases. In addition to establishing these results in the controlled environment of our benchmark set, we show that SFL can effectively be applied in the context of embedded software development in an industrial environment.}, -journal = {J. Syst. Softw.}, -month = nov, -pages = {1780--1792}, -numpages = {13}, -keywords = {Real-time and embedded systems, Software fault diagnosis, Program spectra, Test data analysis, Consumer electronics} -} - -@inproceedings{Jones2002, -author = {Jones, James A. and Harrold, Mary Jean and Stasko, John}, -title = {Visualization of Test Information to Assist Fault Localization}, -year = {2002}, -isbn = {158113472X}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/581339.581397}, -doi = {10.1145/581339.581397}, -abstract = {One of the most expensive and time-consuming components of the debugging process is locating the errors or faults. To locate faults, developers must identify statements involved in failures and select suspicious statements that might contain faults. This paper presents a new technique that uses visualization to assist with these tasks. The technique uses color to visually map the participation of each program statement in the outcome of the execution of the program with a test suite, consisting of both passed and failed test cases. Based on this visual mapping, a user can inspect the statements in the program, identify statements involved in failures, and locate potentially faulty statements. The paper also describes a prototype tool that implements our technique along with a set of empirical studies that use the tool for evaluation of the technique. The empirical studies show that, for the subject we studied, the technique can be effective in helping a user locate faults in a program.}, -booktitle = {Proceedings of the 24th International Conference on Software Engineering}, -pages = {467--477}, -numpages = {11}, -location = {Orlando, Florida}, -series = {ICSE '02} -} - -@article{daSilvaMeyer2004, - title = {Comparison of similarity coefficients used for cluster analysis with dominant markers in maize ({Zea mays L})}, - journal = {Genetics and Molecular Biology}, - author={da Silva Meyer, Andr\'eia and Garcia, Antonio Augusto Franco and de Souza, Anete Pereira and de Souza Jr., Cl\'audio Lopes}, - ISSN = {1415-4757}, - url = {https://doi.org/10.1590/S1415-47572004000100014}, - doi = {https://doi.org/10.1590/S1415-47572004000100014}, - volume = {27}, - year = {2004}, - month = {00}, - pages = {83--91}, - publisher = {scielo}, - crossref = {10.1590/S1415-47572004000100014} -} - -@article{Wong2016, -author = {Wong, W. Eric and Gao, Ruizhi and Li, Yihao and Abreu, Rui and Wotawa, Franz}, -title = {A Survey on Software Fault Localization}, -year = {2016}, -issue_date = {August 2016}, -publisher = {IEEE Press}, -volume = {42}, -number = {8}, -issn = {0098-5589}, -url = {https://doi.org/10.1109/TSE.2016.2521368}, -doi = {10.1109/TSE.2016.2521368}, -abstract = {Software fault localization, the act of identifying the locations of faults in a program, is widely recognized to be one of the most tedious, time consuming, and expensive---yet equally critical---activities in program debugging. Due to the increasing scale and complexity of software today, manually locating faults when failures occur is rapidly becoming infeasible, and consequently, there is a strong demand for techniques that can guide software developers to the locations of faults in a program with minimal human intervention. This demand in turn has fueled the proposal and development of a broad spectrum of fault localization techniques, each of which aims to streamline the fault localization process and make it more effective by attacking the problem in a unique way. In this article, we catalog and provide a comprehensive overview of such techniques and discuss key issues and concerns that are pertinent to software fault localization as a whole.}, -journal = {IEEE Trans. Softw. Eng.}, -month = aug, -pages = {707--740}, -numpages = {34} -} - -@inproceedings{Parnin2011, -author = {Parnin, Chris and Orso, Alessandro}, -title = {Are Automated Debugging Techniques Actually Helping Programmers?}, -year = {2011}, -isbn = {9781450305624}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2001420.2001445}, -doi = {10.1145/2001420.2001445}, -abstract = {Debugging is notoriously difficult and extremely time consuming. Researchers have therefore invested a considerable amount of effort in developing automated techniques and tools for supporting various debugging tasks. Although potentially useful, most of these techniques have yet to demonstrate their practical effectiveness. One common limitation of existing approaches, for instance, is their reliance on a set of strong assumptions on how developers behave when debugging (e.g., the fact that examining a faulty statement in isolation is enough for a developer to understand and fix the corresponding bug). In more general terms, most existing techniques just focus on selecting subsets of potentially faulty statements and ranking them according to some criterion. By doing so, they ignore the fact that understanding the root cause of a failure typically involves complex activities, such as navigating program dependencies and rerunning the program with different inputs. The overall goal of this research is to investigate how developers use and benefit from automated debugging tools through a set of human studies. As a first step in this direction, we perform a preliminary study on a set of developers by providing them with an automated debugging tool and two tasks to be performed with and without the tool. Our results provide initial evidence that several assumptions made by automated debugging techniques do not hold in practice. Through an analysis of the results, we also provide insights on potential directions for future work in the area of automated debugging.}, -booktitle = {Proceedings of the 2011 International Symposium on Software Testing and Analysis}, -pages = {199--209}, -numpages = {11}, -keywords = {statistical debugging, user studies}, -location = {Toronto, Ontario, Canada}, -series = {ISSTA '11} -} - -@article{Ochiai1957, - title={Zoogeographical Studies on the Soleoid Fishes Found in Japan and its Neighbouring Regions-III}, - author={Akira Ochiai}, - journal={Nippon Suisan Gakkaishi}, - year={1957}, - url={https://www.jstage.jst.go.jp/article/suisan1932/22/9/22_9_522/_article/-char/ja/}, - volume={22}, - pages={522--525} -} - -@inproceedings{Kirschner2020, -author = {Kirschner, Lukas and Soremekun, Ezekiel and Zeller, Andreas}, -title = {Debugging Inputs}, -year = {2020}, -isbn = {9781450371223}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://publications.cispa.saarland/3060/}, -XXXdoi = {10.1145/3377812.3390797}, -abstract = {Program failures are often caused by invalid inputs, for instance due to input corruption. To obtain the passing input, one needs to debug the data. In this paper we present a generic technique called ddmax that (1) identifies which parts of the input data prevent processing, and (2) recovers as much of the (valuable) input data as possible. To the best of our knowledge, ddmax is the first approach that fixes faults in the input data without requiring program analysis. In our evaluation, ddmax repaired about 69% of input files and recovered about 78% of data within one minute per input.}, -booktitle = {Proceedings of the ACM/IEEE 42nd International Conference on Software Engineering: Companion Proceedings}, -pages = {300--301}, -numpages = {2}, -location = {Seoul, South Korea}, -series = {ICSE '20} -} - -@inproceedings{Ness1997, -author = {Ness, Brian and Ngo, Viet}, -title = {Regression Containment through Source Change Isolation}, -year = {1997}, -isbn = {0818681055}, -publisher = {IEEE Computer Society}, -url={https://www.computer.org/csdl/proceedings-article/compsac/1997/81050616/12OmNANBZnS}, -address = {USA}, -abstract = {Effective regression containment is an important factor in the design of development and testing processes for large software projects, especially when many developers are doing concurrent work on a common set of sources. Source change isolation provides an inexpensive, mechanical alternative to analytical methods for identifying the cause of software regressions. It also provides the advantage of enabling regressions to be eliminated by reversing the effect of source changes that introduced errant behavior, without the need to write new code, and without halting other development work on the same software. Deliverability is also improved.}, -booktitle = {Proceedings of the 21st International Computer Software and Applications Conference}, -pages = {616}, -numpages = {1}, -series = {COMPSAC '97} -} - -@inproceedings{zheng2003, -author = {Zheng, Alice X. and Jordan, Michael I. and Liblit, Ben and Aiken, Alex}, -title = {Statistical Debugging of Sampled Programs}, -year = {2003}, -publisher = {MIT Press}, -address = {Cambridge, MA, USA}, -abstract = {We present a novel strategy for automatically debugging programs given sampled data from thousands of actual user runs. Our goal is to pinpoint those features that are most correlated with crashes. This is accomplished by maximizing an appropriately defined utility function. It has analogies with intuitive debugging heuristics, and, as we demonstrate, is able to deal with various types of bugs that occur in real programs.}, -booktitle = {Proceedings of the 16th International Conference on Neural Information Processing Systems}, -pages = {603--610}, -numpages = {8}, -location = {Whistler, British Columbia, Canada}, -series = {NIPS'03} -} - -@inproceedings{Liblit2003, -author = {Liblit, Ben and Aiken, Alex and Zheng, Alice X. and Jordan, Michael I.}, -title = {Bug Isolation via Remote Program Sampling}, -year = {2003}, -isbn = {1581136625}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/781131.781148}, -doi = {10.1145/781131.781148}, -abstract = {We propose a low-overhead sampling infrastructure for gathering information from the executions experienced by a program's user community. Several example applications illustrate ways to use sampled instrumentation to isolate bugs. Assertion-dense code can be transformed to share the cost of assertions among many users. Lacking assertions, broad guesses can be made about predicates that predict program errors and a process of elimination used to whittle these down to the true bug. Finally, even for non-deterministic bugs such as memory corruption, statistical modeling based on logistic regression allows us to identify program behaviors that are strongly correlated with failure and are therefore likely places to look for the error.}, -booktitle = {Proceedings of the ACM SIGPLAN 2003 Conference on Programming Language Design and Implementation}, -pages = {141–-154}, -numpages = {14}, -keywords = {statistical debugging, bug isolation, random sampling, logistic regression, assertions, feature selection}, -location = {San Diego, California, USA}, -series = {PLDI '03} -} - -@inproceedings{10.5555/318773.318946, -author = {Zeller, Andreas}, -title = {Yesterday, My Program Worked. Today, It Does Not. Why?}, -year = {1999}, -isbn = {3540665382}, -publisher = {Springer-Verlag}, -address = {Berlin, Heidelberg}, -abstract = {Imagine some program and a number of changes. If none of these changes is applied (“yesterday”), the program works. If all changes are applied (“today”), the program does not work. Which change is responsible for the failure? We present an efficient algorithm that determines the minimal set of failure-inducing changes. Our delta debugging prototype tracked down a single failure-inducing change from 178,000 changed GDB lines within a few hours.}, -booktitle = {Proceedings of the 7th European Software Engineering Conference Held Jointly with the 7th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, -pages = {253--267}, -numpages = {15}, -location = {Toulouse, France}, -series = {ESEC/FSE-7} -} - -@article{Zeller1999, -author = {Zeller, Andreas}, -title = {Yesterday, My Program Worked. Today, It Does Not. Why?}, -year = {1999}, -issue_date = {Nov. 1999}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -volume = {24}, -number = {6}, -issn = {0163-5948}, -url = {https://doi.org/10.1145/318774.318946}, -doi = {10.1145/318774.318946}, -abstract = {Imagine some program and a number of changes. If none of these changes is applied (“yesterday”), the program works. If all changes are applied (“today”), the program does not work. Which change is responsible for the failure? We present an efficient algorithm that determines the minimal set of failure-inducing changes. Our delta debugging prototype tracked down a single failure-inducing change from 178,000 changed GDB lines within a few hours.}, -journal = {SIGSOFT Softw. Eng. Notes}, -month = oct, -pages = {253--267}, -numpages = {15} -} - -@inproceedings{Chen2014, - author={Z. Chen and L. Chen and Y. Zhou and Z. Xu and W. C. Chu and B. Xu}, - booktitle={2014 IEEE 38th Annual Computer Software and Applications Conference}, - title={Dynamic Slicing of Python Programs}, - year={2014}, - volume={}, - number={}, - pages={219-228}, - doi={10.1109/COMPSAC.2014.30} -} - -@article{Weiser1982, -author = {Weiser, Mark}, -title = {Programmers Use Slices When Debugging}, -year = {1982}, -issue_date = {July 1982}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -volume = {25}, -number = {7}, -issn = {0001-0782}, -url = {https://doi.org/10.1145/358557.358577}, -doi = {10.1145/358557.358577}, -abstract = {Computer programmers break apart large programs into smaller coherent pieces. Each of these pieces: functions, subroutines, modules, or abstract datatypes, is usually a contiguous piece of program text. The experiment reported here shows that programmers also routinely break programs into one kind of coherent piece which is not contiguous. When debugging unfamiliar programs programmers use program pieces called slices which are sets of statements related by their flow of data. The statements in a slice are not necessarily textually contiguous, but may be scattered through a program.}, -journal = {Commun. ACM}, -month = jul, -pages = {446–452}, -numpages = {7}, -keywords = {slice, program decomposition} -} - -@inproceedings{10Weiser1981, -author = {Weiser, Mark}, -title = {Program Slicing}, -year = {1981}, -isbn = {0897911466}, -publisher = {IEEE Press}, -abstract = {Program slicing is a method used by experienced computer programmers for abstracting from programs. Starting from a subset of a program's behavior, slicing reduces that program to a minimal form which still produces that behavior. The reduced program, called a “slice”, is an independent program guaranteed to faithfully represent the original program within the domain of the specified subset of behavior. Finding a slice is in general unsolvable. A dataflow algorithm is presented for approximating slices when the behavior subset is specified as the values of a set of variables at a statement. Experimental evidence is presented that these slices are used by programmers during debugging. Experience with two automatic slicing tools is summarized. New measures of program complexity are suggested based on the organization of a program's slices.}, -booktitle = {Proceedings of the 5th International Conference on Software Engineering}, -pages = {439–449}, -numpages = {11}, -keywords = {Human factors, Data flow analysis, Program metrics, Program maintenance, Debugging, Software tools}, -location = {San Diego, California, USA}, -series = {ICSE '81} -} - -@article{Tip1995, - title={A survey of program slicing techniques}, - author={Tip, Frank}, - journal={Journal of programming languages}, - volume={3}, - number={3}, - pages={121--189}, - url={https://www.franktip.org/pubs/jpl1995.pdf}, - year={1995} -} - -@article{Korel1988, -author = {Korel, B. and Laski, J.}, -title = {Dynamic Program Slicing}, -year = {1988}, -issue_date = {October 26, 1988}, -publisher = {Elsevier North-Holland, Inc.}, -address = {USA}, -volume = {29}, -number = {3}, -issn = {0020-0190}, -url = {https://doi.org/10.1016/0020-0190(88)90054-3}, -doi = {10.1016/0020-0190(88)90054-3}, -journal = {Inf. Process. Lett.}, -month = oct, -pages = {155–163}, -numpages = {9} -} - -@inproceedings{Agrawal1990, -author = {Agrawal, Hiralal and Horgan, Joseph R.}, -title = {Dynamic Program Slicing}, -year = {1990}, -isbn = {0897913647}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/93542.93576}, -doi = {10.1145/93542.93576}, -abstract = {Program slices are useful in debugging, testing, maintenance, and understanding of programs. The conventional notion of a program slice, the static slice, is the set of all statements that might affect the value of a given variable occurrence. In this paper, we investigate the concept of the dynamic slice consisting of all statements that actually affect the value of a variable occurrence for a given program input. The sensitivity of dynamic slicing to particular program inputs makes it more useful in program debugging and testing than static slicing. Several approaches for computing dynamic slices are examined. The notion of a Dynamic Dependence Graph and its use in computing dynamic slices is discussed. The Dynamic Dependence Graph may be unbounded in length; therefore, we introduce the economical concept of a Reduced Dynamic Dependence Graph, which is proportional in size to the number of dynamic slices arising during the program execution.}, -booktitle = {Proceedings of the ACM SIGPLAN 1990 Conference on Programming Language Design and Implementation}, -pages = {246–256}, -numpages = {11}, -location = {White Plains, New York, USA}, -series = {PLDI '90} -} - -@inproceedings{Ko2004, -author = {Ko, Andrew J. and Myers, Brad A.}, -title = {Designing the Whyline: A Debugging Interface for Asking Questions about Program Behavior}, -year = {2004}, -isbn = {1581137028}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/985692.985712}, -doi = {10.1145/985692.985712}, -abstract = {Debugging is still among the most common and costly of programming activities. One reason is that current debugging tools do not directly support the inquisitive nature of the activity. Interrogative Debugging is a new debugging paradigm in which programmers can ask why did and even why didn't questions directly about their program's runtime failures. The Whyline is a prototype Interrogative Debugging interface for the Alice programming environment that visualizes answers in terms of runtime events directly relevant to a programmer's question. Comparisons of identical debugging scenarios from user tests with and without the Whyline showed that the Whyline reduced debugging time by nearly a factor of 8, and helped programmers complete 40% more tasks.}, -booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems}, -pages = {151–158}, -numpages = {8}, -keywords = {debugging, program slicing, Alice}, -location = {Vienna, Austria}, -series = {CHI '04} -} - -@article{Soremekun2021, -title = {Locating Faults with Program Slicing: An Empirical Analysis}, -author = {Ezekiel Soremekun and Lukas Kirschner and Marcel B{\"o}hme and Andreas Zeller}, -journal = {Empirical Software Engineering}, -year = {2021}, -url = {https://figshare.com/articles/conference_contribution/Locating_Faults_with_Program_Slicing_-_An_Empirical_Analysis_-_Replication_Package/13369400/1} -} - -@ARTICLE{LeGoues2012, - author={C. {Le Goues} and T. {Nguyen} and S. {Forrest} and W. {Weimer}}, - journal={IEEE Transactions on Software Engineering}, - title={GenProg: A Generic Method for Automatic Software Repair}, - year={2012}, - volume={38}, - number={1}, - pages={54--72}, - doi={10.1109/TSE.2011.104}, - url={https://ieeexplore.ieee.org/document/6035728} -} - -@article{Pei2014, - author={Y. {Pei} and C. A. {Furia} and M. {Nordio} and Y. {Wei} and B. {Meyer} and A. {Zeller}}, - journal={IEEE Transactions on Software Engineering}, - title={Automated Fixing of Programs with Contracts}, - year={2014}, - volume={40}, - number={5}, - pages={427--449}, - doi={10.1109/TSE.2014.2312918}, - url={https://ieeexplore.ieee.org/document/6776507} -} - -@inproceedings{Nguyen2013, -author = {Nguyen, Hoang Duong Thien and Qi, Dawei and Roychoudhury, Abhik and Chandra, Satish}, -title = {SemFix: Program Repair via Semantic Analysis}, -year = {2013}, -isbn = {9781467330763}, -publisher = {IEEE Press}, -abstract = {Debugging consumes significant time and effort in any major software development project. Moreover, even after the root cause of a bug is identified, fixing the bug is non-trivial. Given this situation, automated program repair methods are of value. In this paper, we present an automated repair method based on symbolic execution, constraint solving and program synthesis. In our approach, the requirement on the repaired code to pass a given set of tests is formulated as a constraint. Such a constraint is then solved by iterating over a layered space of repair expressions, layered by the complexity of the repair code. We compare our method with recently proposed genetic programming based repair on SIR programs with seeded bugs, as well as fragments of GNU Coreutils with real bugs. On these subjects, our approach reports a higher success-rate than genetic programming based repair, and produces a repair faster.}, -booktitle = {Proceedings of the 2013 International Conference on Software Engineering}, -pages = {772--781}, -numpages = {10}, -location = {San Francisco, CA, USA}, -url = {https://dl.acm.org/doi/10.5555/2486788.2486890}, -series = {ICSE '13} -} - -@inproceedings{Kalhauge2019, -author = {Kalhauge, Christian Gram and Palsberg, Jens}, -title = {Binary Reduction of Dependency Graphs}, -year = {2019}, -isbn = {9781450355728}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/3338906.3338956}, -doi = {10.1145/3338906.3338956}, -abstract = {Delta debugging is a technique for reducing a failure-inducing input to a small input that reveals the cause of the failure. This has been successful for a wide variety of inputs including C programs, XML data, and thread schedules. However, for input that has many internal dependencies, delta debugging scales poorly. Such input includes C#, Java, and Java bytecode and they have presented a major challenge for input reduction until now. In this paper, we show that the core challenge is a reduction problem for dependency graphs, and we present a general strategy for reducing such graphs. We combine this with a novel algorithm for reduction called Binary Reduction in a tool called J-Reduce for Java bytecode. Our experiments show that our tool is 12x faster and achieves more reduction than delta debugging on average. This enabled us to create and submit short bug reports for three Java bytecode decompilers.}, -booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, -pages = {556--566}, -numpages = {11}, -keywords = {dependencies, Debugging, reduction}, -location = {Tallinn, Estonia}, -series = {ESEC/FSE 2019} -} - -@inproceedings{Gopinath2020, -author = {Gopinath, Rahul and Mathis, Bj\"{o}rn and Zeller, Andreas}, -title = {Mining Input Grammars from Dynamic Control Flow}, -year = {2020}, -isbn = {9781450370431}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/3368089.3409679}, -abstract = {One of the key properties of a program is its input specification. Having a formal input specification can be critical in fields such as vulnerability analysis, reverse engineering, software testing, clone detection, or refactoring. Unfortunately, accurate input specifications for typical programs are often unavailable or out of date. In this paper, we present a general algorithm that takes a program and a small set of sample inputs and automatically infers a readable context-free grammar capturing the input language of the program. We infer the syntactic input structure only by observing access of input characters at different locations of the input parser. This works on all stack based recursive descent input parsers, including parser combinators, and works entirely without program specific heuristics. Our Mimid prototype produced accurate and readable grammars for a variety of evaluation subjects, including complex languages such as JSON, TinyC, and JavaScript.}, -booktitle = {Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, -pages = {172–183}, -numpages = {12} -} - -@inproceedings{Bettenburg2008, -author = {Bettenburg, Nicolas and Just, Sascha and Schr\"{o}ter, Adrian and Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas}, -title = {What Makes a Good Bug Report?}, -year = {2008}, -isbn = {9781595939951}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1453101.1453146}, -doi = {10.1145/1453101.1453146}, -abstract = {In software development, bug reports provide crucial information to developers. However, these reports widely differ in their quality. We conducted a survey among developers and users of APACHE, ECLIPSE, and MOZILLA to find out what makes a good bug report.The analysis of the 466 responses revealed an information mismatch between what developers need and what users supply. Most developers consider steps to reproduce, stack traces, and test cases as helpful, which are at the same time most difficult to provide for users. Such insight is helpful to design new bug tracking tools that guide users at collecting and providing more helpful information.Our CUEZILLA prototype is such a tool and measures the quality of new bug reports; it also recommends which elements should be added to improve the quality. We trained CUEZILLA on a sample of 289 bug reports, rated by developers as part of the survey. In our experiments, CUEZILLA was able to predict the quality of 31--48% of bug reports accurately.}, -booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, -pages = {308–318}, -numpages = {11}, -location = {Atlanta, Georgia}, -series = {SIGSOFT '08/FSE-16} -} - -@inproceedings{Bertram2010, -author = {Bertram, Dane and Voida, Amy and Greenberg, Saul and Walker, Robert}, -title = {Communication, Collaboration, and Bugs: The Social Nature of Issue Tracking in Small, Collocated Teams}, -year = {2010}, -isbn = {9781605587950}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1718918.1718972}, -doi = {10.1145/1718918.1718972}, -abstract = {Issue tracking systems help organizations manage issue reporting, assignment, tracking, resolution, and archiving. Traditionally, it is the Software Engineering community that researches issue tracking systems, where software defects are reported and tracked as 'bug reports' within an archival database. Yet, as issue tracking is fundamentally a social process, it is important to understand the design and use of issue tracking systems from that perspective. Consequently, we conducted a qualitative study of issue tracking systems as used by small, collocated software development teams. We found that an issue tracker is not just a database for tracking bugs, features, and inquiries, but also a focal point for communication and coordination for many stakeholders within and beyond the software team. Customers, project managers, quality assurance personnel, and programmers all contribute to the shared knowledge and persistent communication that exists within the issue tracking system. These results were all the more striking because in spite of teams being collocated--which afforded frequent, face-to-face communication--the issue tracker was still used as a fundamental communication channel. We articulate various real-world practices surrounding issue trackers and offer design considerations for future systems.}, -booktitle = {Proceedings of the 2010 ACM Conference on Computer Supported Cooperative Work}, -pages = {291–300}, -numpages = {10}, -keywords = {shared knowledge, software engineering, issue tracking}, -location = {Savannah, Georgia, USA}, -series = {CSCW '10} -} - -@inproceedings{Bissyande2013, - author={T. F. Bissyandé and D. Lo and L. Jiang and L. Réveillère and J. Klein and Y. L. Traon}, - booktitle={2013 IEEE 24th International Symposium on Software Reliability Engineering (ISSRE)}, - title={Got issues? Who cares about it? A large scale investigation of issue trackers from GitHub}, - year={2013}, - volume={}, - number={}, - pages={188-197}, - doi={10.1109/ISSRE.2013.6698918} -} - -@inproceedings{Herzig2013, - author={K. Herzig and S. Just and A. Zeller}, - booktitle={2013 35th International Conference on Software Engineering (ICSE)}, - title={It's not a bug, it's a feature: How misclassification impacts bug prediction}, - year={2013}, - volume={}, - number={}, - pages={392-401}, - doi={10.1109/ICSE.2013.6606585} -} - -@inproceedings{Anvik2006, -author = {Anvik, John and Hiew, Lyndon and Murphy, Gail C.}, -title = {Who Should Fix This Bug?}, -year = {2006}, -isbn = {1595933751}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1134285.1134336}, -doi = {10.1145/1134285.1134336}, -abstract = {Open source development projects typically support an open bug repository to which both developers and users can report bugs. The reports that appear in this repository must be triaged to determine if the report is one which requires attention and if it is, which developer will be assigned the responsibility of resolving the report. Large open source developments are burdened by the rate at which new bug reports appear in the bug repository. In this paper, we present a semi-automated approach intended to ease one part of this process, the assignment of reports to a developer. Our approach applies a machine learning algorithm to the open bug repository to learn the kinds of reports each developer resolves. When a new report arrives, the classifier produced by the machine learning technique suggests a small number of developers suitable to resolve the report. With this approach, we have reached precision levels of 57% and 64% on the Eclipse and Firefox development projects respectively. We have also applied our approach to the gcc open source development with less positive results. We describe the conditions under which the approach is applicable and also report on the lessons we learned about applying machine learning to repositories used in open source development.}, -booktitle = {Proceedings of the 28th International Conference on Software Engineering}, -pages = {361–370}, -numpages = {10}, -keywords = {bug report assignment, problem tracking, bug triage, issue tracking, machine learning}, -location = {Shanghai, China}, -series = {ICSE '06} -} - -@article{Kim2013, - author={D. Kim and Y. Tao and S. Kim and A. Zeller}, - journal={IEEE Transactions on Software Engineering}, - title={Where Should We Fix This Bug? A Two-Phase Recommendation Model}, - year={2013}, - volume={39}, - number={11}, - pages={1597-1610}, - doi={10.1109/TSE.2013.24} -} - -@inproceedings{Wang2008, -author = {Wang, Xiaoyin and Zhang, Lu and Xie, Tao and Anvik, John and Sun, Jiasu}, -title = {An Approach to Detecting Duplicate Bug Reports Using Natural Language and Execution Information}, -year = {2008}, -isbn = {9781605580791}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1368088.1368151}, -doi = {10.1145/1368088.1368151}, -abstract = {An open source project typically maintains an open bug repository so that bug reports from all over the world can be gathered. When a new bug report is submitted to the repository, a person, called a triager, examines whether it is a duplicate of an existing bug report. If it is, the triager marks it as DUPLICATE and the bug report is removed from consideration for further work. In the literature, there are approaches exploiting only natural language information to detect duplicate bug reports. In this paper we present a new approach that further involves execution information. In our approach, when a new bug report arrives, its natural language information and execution information are compared with those of the existing bug reports. Then, a small number of existing bug reports are suggested to the triager as the most similar bug reports to the new bug report. Finally, the triager examines the suggested bug reports to determine whether the new bug report duplicates an existing bug report. We calibrated our approach on a subset of the Eclipse bug repository and evaluated our approach on a subset of the Firefox bug repository. The experimental results show that our approach can detect 67%-93% of duplicate bug reports in the Firefox bug repository, compared to 43%-72% using natural language information alone.}, -booktitle = {Proceedings of the 30th International Conference on Software Engineering}, -pages = {461–470}, -numpages = {10}, -keywords = {execution information, duplicate bug report, information retrieval}, -location = {Leipzig, Germany}, -series = {ICSE '08} -} - -@inproceedings{Gopinath2020, -author = {Gopinath, Rahul and Kampmann, Alexander and Havrikov, Nikolas and Soremekun, Ezekiel O. and Zeller, Andreas}, -title = {Abstracting Failure-Inducing Inputs}, -year = {2020}, -isbn = {9781450380089}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/3395363.3397349}, -doi = {10.1145/3395363.3397349}, -abstract = {A program fails. Under which circumstances does the failure occur? Starting with a single failure-inducing input ("The input ((4)) fails") and an input grammar, the DDSET algorithm uses systematic tests to automatically generalize the input to an abstract failure-inducing input that contains both (concrete) terminal symbols and (abstract) nonterminal symbols from the grammar—for instance, "(())", which represents any expression in double parentheses. Such an abstract failure-inducing input can be used (1) as a debugging diagnostic, characterizing the circumstances under which a failure occurs ("The error occurs whenever an expression is enclosed in double parentheses"); (2) as a producer of additional failure-inducing tests to help design and validate fixes and repair candidates ("The inputs ((1)), ((3 * 4)), and many more also fail"). In its evaluation on real-world bugs in JavaScript, Clojure, Lua, and UNIX command line utilities, DDSET’s abstract failure-inducing inputs provided to-the-point diagnostics, and precise producers for further failure inducing inputs.}, -booktitle = {Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis}, -pages = {237–248}, -numpages = {12}, -keywords = {error diagnosis, debugging, grammars, failure-inducing inputs}, -location = {Virtual Event, USA}, -series = {ISSTA 2020} -} - -@inproceedings{Kampmann2020, -author = {Kampmann, Alexander and Havrikov, Nikolas and Soremekun, Ezekiel O. and Zeller, Andreas}, -title = {When Does My Program Do This? Learning Circumstances of Software Behavior}, -year = {2020}, -isbn = {9781450370431}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/3368089.3409687}, -abstract = {A program fails. Under which circumstances does the failure occur? Our Alhazenapproach starts with a run that exhibits a particular behavior and automatically determines input features associated with the behavior in question: (1) We use a grammar to parse the input into individual elements. (2) We use a decision tree learner to observe and learn which input elements are associated with the behavior in question. (3) We use the grammar to generate additional inputs to further strengthen or refute hypotheses as learned associations. (4) By repeating steps 2 and 3, we obtain a theory that explains and predicts the given behavior. In our evaluation using inputs for find, grep, NetHack, and a JavaScript transpiler, the theories produced by Alhazen predict and produce failures with high accuracy and allow developers to focus on a small set of input features: “grep fails whenever the --fixed-strings option is used in conjunction with an empty search string.”}, -booktitle = {Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, -pages = {1228–1239}, -numpages = {12} -} - -@inproceedings{Gopinath2021, - title = {Input Algebras}, - author = {Gopinath, Rahul and Nemati, Hamed and Zeller, Andreas}, - booktitle = {International Conference on Software Engineering (ICSE 2021)}, - year = {2021}, - url = {https://publications.cispa.saarland/3208/}, - note = {To Appear} -} - -@inproceedings{King2005, -author = {Samuel T. King and George W. Dunlap and Peter M. Chen}, -title = {Debugging Operating Systems with Time-Traveling Virtual Machines}, -booktitle = {2005 {USENIX} Annual Technical Conference ({USENIX} {ATC} 05)}, -year = {2005}, -address = {Anaheim, CA}, -url = {https://www.usenix.org/conference/2005-usenix-annual-technical-conference/debugging-operating-systems-time-traveling}, -publisher = {{USENIX} Association}, -month = apr, -} - -@inproceedings{Glerum2009, -author = {Glerum, Kirk and Kinshumann, Kinshuman and Greenberg, Steve and Aul, Gabriel and Orgovan, Vince and Nichols, Greg and Grant, David and Loihle, Gretchen and Hunt, Galen}, -title = {Debugging in the (Very) Large: Ten Years of Implementation and Experience}, -year = {2009}, -isbn = {9781605587523}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1629575.1629586}, -doi = {10.1145/1629575.1629586}, -abstract = {Windows Error Reporting (WER) is a distributed system that automates the processing of error reports coming from an installed base of a billion machines. WER has collected billions of error reports in ten years of operation. It collects error data automatically and classifies errors into buckets, which are used to prioritize developer effort and report fixes to users. WER uses a progressive approach to data collection, which minimizes overhead for most reports yet allows developers to collect detailed information when needed. WER takes advantage of its scale to use error statistics as a tool in debugging; this allows developers to isolate bugs that could not be found at smaller scale. WER has been designed for large scale: one pair of database servers can record all the errors that occur on all Windows computers worldwide.}, -booktitle = {Proceedings of the ACM SIGOPS 22nd Symposium on Operating Systems Principles}, -pages = {103–116}, -numpages = {14}, -keywords = {classifying, statistics-based debugging., error reports, blue screen of death, minidump, bucketing, labeling}, -location = {Big Sky, Montana, USA}, -series = {SOSP '09} -} diff --git a/fuzzingbook.bib b/fuzzingbook.bib new file mode 120000 index 000000000..eeb1699ea --- /dev/null +++ b/fuzzingbook.bib @@ -0,0 +1 @@ +notebooks/shared/fuzzingbook.bib \ No newline at end of file diff --git a/notebooks/ClassDiagram.ipynb b/notebooks/ClassDiagram.ipynb deleted file mode 100644 index 58136669e..000000000 --- a/notebooks/ClassDiagram.ipynb +++ /dev/null @@ -1,1199 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "# Class Diagrams\n", - "\n", - "This is a simple viewer for class diagrams. Customized towards the book." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "**Prerequisites**\n", - "\n", - "* _Refer to earlier chapters as notebooks here, as here:_ [Earlier Chapter](Debugger.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - }, - "slideshow": { - "slide_type": "skip" - } - }, - "outputs": [], - "source": [ - "import bookutils" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synopsis\n", - "\n", - "\n", - "To [use the code provided in this chapter](Importing.ipynb), write\n", - "\n", - "```python\n", - ">>> from fuzzingbook.ClassDiagram import \n", - "```\n", - "\n", - "and then make use of the following features.\n", - "\n", - "\n", - "The `display_class_hierarchy()` function shows the class hierarchy for the given class. Methods with docstrings (intended to be used by the public) are shown in bold.\n", - "\n", - "```python\n", - ">>> display_class_hierarchy(GrammarFuzzer)\n", - "```\n", - "\n", - "![](PICS/ClassDiagram-synopsis-1.svg)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": true, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Getting a Class Hierarchy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import inspect" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using `mro()`, we can access the class hierarchy. We make sure to avoid duplicates created by `class X(X)`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Callable, Dict, Type, Set, List, Union, Any, Tuple, Optional" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def class_hierarchy(cls: Type) -> List[Type]:\n", - " superclasses = cls.mro()\n", - " hierarchy = []\n", - " last_superclass_name = \"\"\n", - "\n", - " for superclass in superclasses:\n", - " if superclass.__name__ != last_superclass_name:\n", - " hierarchy.append(superclass)\n", - " last_superclass_name = superclass.__name__\n", - "\n", - " return hierarchy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here's an example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class A_Class:\n", - " \"\"\"A Class which does A thing right.\n", - " Comes with a longer docstring.\"\"\"\n", - "\n", - " def foo(self) -> None:\n", - " \"\"\"The Adventures of the glorious Foo\"\"\"\n", - " pass\n", - "\n", - " def quux(self) -> None:\n", - " \"\"\"A method that is not used.\"\"\"\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class A_Class(A_Class):\n", - " # We define another function in a separate cell.\n", - "\n", - " def second(self) -> None:\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class B_Class(A_Class):\n", - " \"\"\"A subclass inheriting some methods.\"\"\"\n", - "\n", - " VAR = \"A variable\"\n", - "\n", - " def foo(self) -> None:\n", - " \"\"\"A WW2 foo fighter.\"\"\"\n", - " pass\n", - "\n", - " def bar(self, qux: Any = None, bartender: int = 42) -> None:\n", - " \"\"\"A qux walks into a bar.\n", - " `bartender` is an optional attribute.\"\"\"\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class C_Class:\n", - " \"\"\"A class injecting some method\"\"\"\n", - "\n", - " def qux(self) -> None:\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class D_Class(B_Class, C_Class):\n", - " \"\"\"A subclass inheriting from multiple superclasses.\n", - " Comes with a fairly long, but meaningless documentation.\"\"\"\n", - "\n", - " def foo(self) -> None:\n", - " B_Class.foo(self)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class D_Class(D_Class):\n", - " pass # An incremental addiiton that should not impact D's semantics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_hierarchy(D_Class)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Getting a Class Tree" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can use `__bases__` to obtain the immediate base classes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "D_Class.__bases__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`class_tree()` returns a class tree, using the \"lowest\" (most specialized) class with the same name." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def class_tree(cls: Type, lowest: Type = None) -> List[Tuple[Type, List]]:\n", - " ret = []\n", - " for base in cls.__bases__:\n", - " if base.__name__ == cls.__name__:\n", - " if not lowest:\n", - " lowest = cls\n", - " ret += class_tree(base, lowest)\n", - " else:\n", - " if lowest:\n", - " cls = lowest\n", - " ret.append((cls, class_tree(base)))\n", - "\n", - " return ret" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_tree(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_tree(D_Class)[0][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert class_tree(D_Class)[0][0] == D_Class" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`class_set()` flattens the tree into a set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def class_set(classes: Union[Type, List[Type]]) -> Set[Type]:\n", - " if not isinstance(classes, list):\n", - " classes = [classes]\n", - "\n", - " ret = set()\n", - "\n", - " def traverse_tree(tree: List[Tuple[Type, List]]) -> None:\n", - " for (cls, subtrees) in tree:\n", - " ret.add(cls)\n", - " for subtree in subtrees:\n", - " traverse_tree(subtrees)\n", - "\n", - " for cls in classes:\n", - " traverse_tree(class_tree(cls))\n", - "\n", - " return ret" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_set(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert A_Class in class_set(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert B_Class in class_set(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert C_Class in class_set(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert D_Class in class_set(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_set([B_Class, C_Class])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Getting Docs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "A_Class.__doc__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "A_Class.__bases__[0].__doc__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "A_Class.__bases__[0].__name__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "D_Class.foo" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "D_Class.foo.__doc__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "A_Class.foo.__doc__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def docstring(obj: Any) -> str:\n", - " doc = inspect.getdoc(obj)\n", - " return doc if doc else \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "docstring(A_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "docstring(D_Class.foo)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def unknown() -> None:\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "docstring(unknown)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def escape(text: str) -> str:\n", - " text = html.escape(text)\n", - " assert '<' not in text\n", - " assert '>' not in text\n", - " text = text.replace('{', '{')\n", - " text = text.replace('|', '|')\n", - " text = text.replace('}', '}')\n", - " return text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "escape(\"f(foo={})\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def escape_doc(docstring: str) -> str:\n", - " DOC_INDENT = 0\n", - " docstring = \" \".join(\n", - " ' ' * DOC_INDENT + escape(line).strip()\n", - " for line in docstring.split('\\n')\n", - " )\n", - " return docstring" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(escape_doc(\"'Hello\\n {You|Me}'\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Getting Methods and Variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "inspect.getmembers(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def class_items(cls: Type, pred: Callable) -> List[Tuple[str, Any]]:\n", - " def _class_items(cls: Type) -> List:\n", - " all_items = inspect.getmembers(cls, pred)\n", - " for base in cls.__bases__:\n", - " all_items += _class_items(base)\n", - "\n", - " return all_items\n", - "\n", - " unique_items = []\n", - " items_seen = set()\n", - " for (name, item) in _class_items(cls):\n", - " if name not in items_seen:\n", - " unique_items.append((name, item))\n", - " items_seen.add(name)\n", - "\n", - " return unique_items" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def class_methods(cls: Type) -> List[Tuple[str, Callable]]:\n", - " return class_items(cls, inspect.isfunction)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def defined_in(name: str, cls: Type) -> bool:\n", - " if not hasattr(cls, name):\n", - " return False\n", - "\n", - " defining_classes = []\n", - "\n", - " def search_superclasses(name: str, cls: Type) -> None:\n", - " if not hasattr(cls, name):\n", - " return\n", - "\n", - " for base in cls.__bases__:\n", - " if hasattr(base, name):\n", - " defining_classes.append(base)\n", - " search_superclasses(name, base)\n", - "\n", - " search_superclasses(name, cls)\n", - "\n", - " if any(cls.__name__ != c.__name__ for c in defining_classes):\n", - " return False # Already defined in superclass\n", - "\n", - " return True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert not defined_in('VAR', A_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert defined_in('VAR', B_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert not defined_in('VAR', C_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert not defined_in('VAR', D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def class_vars(cls: Type) -> List[Any]:\n", - " def is_var(item: Any) -> bool:\n", - " return not callable(item)\n", - "\n", - " return [item for item in class_items(cls, is_var) \n", - " if not item[0].startswith('__') and defined_in(item[0], cls)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_methods(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class_vars(B_Class)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We're only interested in \n", - "\n", - "* functions _defined_ in that class\n", - "* functions that come with a docstring" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def public_class_methods(cls: Type) -> List[Tuple[str, Callable]]:\n", - " return [(name, method) for (name, method) in class_methods(cls) \n", - " if method.__qualname__.startswith(cls.__name__)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def doc_class_methods(cls: Type) -> List[Tuple[str, Callable]]:\n", - " return [(name, method) for (name, method) in public_class_methods(cls) \n", - " if docstring(method) is not None]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "public_class_methods(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "doc_class_methods(D_Class)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def overloaded_class_methods(classes: Union[Type, List[Type]]) -> Set[str]:\n", - " all_methods: Dict[str, Set[Callable]] = {}\n", - " for cls in class_set(classes):\n", - " for (name, method) in class_methods(cls):\n", - " if method.__qualname__.startswith(cls.__name__):\n", - " all_methods.setdefault(name, set())\n", - " all_methods[name].add(cls)\n", - "\n", - " return set(name for name in all_methods if len(all_methods[name]) >= 2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "overloaded_class_methods(D_Class)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Drawing Class Hierarchy with Method Names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from inspect import signature" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import warnings" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def display_class_hierarchy(classes: Union[Type, List[Type]], \n", - " public_methods: Optional[List] = None,\n", - " abstract_classes: Optional[List] = None,\n", - " include_methods: bool = True,\n", - " include_class_vars: bool =True,\n", - " include_legend: bool = True,\n", - " project: str = 'fuzzingbook',\n", - " log: bool = False) -> Any:\n", - " \"\"\"Visualize a class hierarchy.\n", - "`classes` is a Python class (or a list of classes) to be visualized.\n", - "`public_methods`, if given, is a list of methods to be shown as \"public\" (bold).\n", - " (Default: all methods with a docstring)\n", - "`abstract_classes`, if given, is a list of classes to be shown as \"abstract\" (cursive).\n", - " (Default: all classes with an abstract method)\n", - "`include_methods`: if True, include all methods (default)\n", - "`include_legend`: if True, include a legend (default)\n", - " \"\"\"\n", - " from graphviz import Digraph\n", - "\n", - " if project == 'debuggingbook':\n", - " CLASS_FONT = 'Raleway, Helvetica, Arial, sans-serif'\n", - " CLASS_COLOR = '#6A0DAD' # HTML 'purple'\n", - " else:\n", - " CLASS_FONT = 'Patua One, Helvetica, sans-serif'\n", - " CLASS_COLOR = '#B03A2E'\n", - "\n", - " METHOD_FONT = \"'Fira Mono', 'Source Code Pro', 'Courier', monospace\"\n", - " METHOD_COLOR = 'black'\n", - "\n", - " if isinstance(classes, list):\n", - " starting_class = classes[0]\n", - " else:\n", - " starting_class = classes\n", - " classes = [starting_class]\n", - "\n", - " title = starting_class.__name__ + \" class hierarchy\"\n", - "\n", - " dot = Digraph(comment=title)\n", - " dot.attr('node', shape='record', fontname=CLASS_FONT)\n", - " dot.attr('graph', rankdir='BT', tooltip=title)\n", - " dot.attr('edge', arrowhead='empty')\n", - " edges = set()\n", - " overloaded_methods: Set[str] = set()\n", - "\n", - " drawn_classes = set()\n", - "\n", - " def method_string(method_name: str, public: bool, overloaded: bool,\n", - " fontsize: float = 10.0) -> str:\n", - " method_string = f''\n", - "\n", - " if overloaded:\n", - " name = f'{method_name}()'\n", - " else:\n", - " name = f'{method_name}()'\n", - "\n", - " if public:\n", - " method_string += f'{name}'\n", - " else:\n", - " method_string += f'' \\\n", - " f'{name}'\n", - "\n", - " method_string += ''\n", - " return method_string\n", - "\n", - " def var_string(var_name: str, fontsize: int = 10) -> str:\n", - " var_string = f''\n", - " var_string += f'{var_name}'\n", - " var_string += ''\n", - " return var_string\n", - "\n", - " def is_overloaded(method_name: str, f: Any) -> bool:\n", - " return (method_name in overloaded_methods or\n", - " (docstring(f) is not None and \"in subclasses\" in docstring(f)))\n", - "\n", - " def is_abstract(cls: Type) -> bool:\n", - " if not abstract_classes:\n", - " return inspect.isabstract(cls)\n", - "\n", - " return (cls in abstract_classes or\n", - " any(c.__name__ == cls.__name__ for c in abstract_classes))\n", - "\n", - " def is_public(method_name: str, f: Any) -> bool:\n", - " if public_methods:\n", - " return (method_name in public_methods or\n", - " f in public_methods or\n", - " any(f.__qualname__ == m.__qualname__\n", - " for m in public_methods))\n", - "\n", - " return bool(docstring(f))\n", - "\n", - " def class_vars_string(cls: Type, url: str) -> str:\n", - " cls_vars = class_vars(cls)\n", - " if len(cls_vars) == 0:\n", - " return \"\"\n", - "\n", - " vars_string = f''\n", - "\n", - " for (name, var) in cls_vars:\n", - " if log:\n", - " print(f\" Drawing {name}\")\n", - "\n", - " var_doc = escape(f\"{name} = {repr(var)}\")\n", - " tooltip = f' tooltip=\"{var_doc}\"'\n", - " href = f' href=\"{url}\"'\n", - " vars_string += f''\n", - "\n", - " vars_string += '
'\n", - "\n", - " vars_string += var_string(name)\n", - " vars_string += '
'\n", - " return vars_string\n", - "\n", - " def class_methods_string(cls: Type, url: str) -> str:\n", - " methods = public_class_methods(cls)\n", - " # return \"
\".join([name + \"()\" for (name, f) in methods])\n", - " if len(methods) == 0:\n", - " return \"\"\n", - "\n", - " methods_string = f''\n", - "\n", - " for public in [True, False]:\n", - " for (name, f) in methods:\n", - " if public != is_public(name, f):\n", - " continue\n", - "\n", - " if log:\n", - " print(f\" Drawing {name}()\")\n", - "\n", - " if is_public(name, f) and not docstring(f):\n", - " warnings.warn(f\"{f.__qualname__}() is listed as public,\"\n", - " f\" but has no docstring\")\n", - "\n", - " overloaded = is_overloaded(name, f)\n", - "\n", - " method_doc = escape(name + str(inspect.signature(f)))\n", - " if docstring(f):\n", - " method_doc += \": \" + escape_doc(docstring(f))\n", - "\n", - " # Tooltips are only shown if a href is present, too\n", - " tooltip = f' tooltip=\"{method_doc}\"'\n", - " href = f' href=\"{url}\"'\n", - " methods_string += f''\n", - "\n", - " methods_string += '
'\n", - "\n", - " methods_string += method_string(name, public, overloaded)\n", - "\n", - " methods_string += '
'\n", - " return methods_string\n", - "\n", - " def display_class_node(cls: Type) -> None:\n", - " name = cls.__name__\n", - "\n", - " if name in drawn_classes:\n", - " return\n", - " drawn_classes.add(name)\n", - "\n", - " if log:\n", - " print(f\"Drawing class {name}\")\n", - "\n", - " if cls.__module__ == '__main__':\n", - " url = '#'\n", - " else:\n", - " url = cls.__module__ + '.ipynb'\n", - "\n", - " if is_abstract(cls):\n", - " formatted_class_name = f'{cls.__name__}'\n", - " else:\n", - " formatted_class_name = cls.__name__\n", - "\n", - " if include_methods or include_class_vars:\n", - " vars = class_vars_string(cls, url)\n", - " methods = class_methods_string(cls, url)\n", - " spec = '<{' + \\\n", - " formatted_class_name + ''\n", - " if include_class_vars and vars:\n", - " spec += '|' + vars\n", - " if include_methods and methods:\n", - " spec += '|' + methods\n", - " spec += '}>'\n", - " else:\n", - " spec = '<' + formatted_class_name + '>'\n", - "\n", - " class_doc = escape('class ' + cls.__name__)\n", - " if docstring(cls):\n", - " class_doc += ': ' + escape_doc(docstring(cls))\n", - " else:\n", - " warnings.warn(f\"Class {cls.__name__} has no docstring\")\n", - "\n", - " dot.node(name, spec, tooltip=class_doc, href=url)\n", - "\n", - " def display_class_trees(trees: List[Tuple[Type, List]]) -> None:\n", - " for tree in trees:\n", - " (cls, subtrees) = tree\n", - " display_class_node(cls)\n", - "\n", - " for subtree in subtrees:\n", - " (subcls, _) = subtree\n", - "\n", - " if (cls.__name__, subcls.__name__) not in edges:\n", - " dot.edge(cls.__name__, subcls.__name__)\n", - " edges.add((cls.__name__, subcls.__name__))\n", - "\n", - " display_class_trees(subtrees)\n", - "\n", - " def display_legend() -> None:\n", - " fontsize = 8.0\n", - "\n", - " label = f'Legend
' \n", - "\n", - " for item in [\n", - " method_string(\"public_method\",\n", - " public=True, overloaded=False, fontsize=fontsize),\n", - " method_string(\"private_method\",\n", - " public=False, overloaded=False, fontsize=fontsize),\n", - " method_string(\"overloaded_method\",\n", - " public=False, overloaded=True, fontsize=fontsize)\n", - " ]:\n", - " label += '• ' + item + '
'\n", - "\n", - " label += f'' \\\n", - " 'Hover over names to see doc' \\\n", - " '
'\n", - "\n", - " dot.node('Legend', label=f'<{label}>', shape='plain', fontsize=str(fontsize + 2))\n", - "\n", - " for cls in classes:\n", - " tree = class_tree(cls)\n", - " overloaded_methods = overloaded_class_methods(cls)\n", - " display_class_trees(tree)\n", - "\n", - " if include_legend:\n", - " display_legend()\n", - "\n", - " return dot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display_class_hierarchy(D_Class, project='debuggingbook', log=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display_class_hierarchy(D_Class, project='fuzzingbook')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is a variant with abstract classes and logging:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display_class_hierarchy([A_Class, B_Class],\n", - " abstract_classes=[A_Class],\n", - " public_methods=[\n", - " A_Class.quux,\n", - " ], log=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synopsis" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The function `display_class_hierarchy()` function shows the class hierarchy for the given class (or list of classes). \n", - "* The keyword parameter `public_methods`, if given, is a list of \"public\" methods to be used by clients (default: all methods with docstrings).\n", - "* The keyword parameter `abstract_classes`, if given, is a list of classes to be displayed as \"abstract\" (i.e. with a cursive class name)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display_class_hierarchy(D_Class, abstract_classes=[A_Class])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Exercises" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Enjoy!" - ] - } - ], - "metadata": { - "ipub": { - "bibliography": "fuzzingbook.bib", - "toc": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - }, - "toc-autonumbering": false - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/ClassDiagram.ipynb b/notebooks/ClassDiagram.ipynb new file mode 120000 index 000000000..7ec75c709 --- /dev/null +++ b/notebooks/ClassDiagram.ipynb @@ -0,0 +1 @@ +shared/ClassDiagram.ipynb \ No newline at end of file diff --git a/notebooks/ExpectError.ipynb b/notebooks/ExpectError.ipynb deleted file mode 100644 index 65d158175..000000000 --- a/notebooks/ExpectError.ipynb +++ /dev/null @@ -1,626 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "# Error Handling\n", - "\n", - "The code in this notebook helps with handling errors. Normally, an error in notebook code causes the execution of the code to stop; while an infinite loop in notebook code causes the notebook to run without end. This notebook provides two classes to help address these concerns." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "**Prerequisites**\n", - "\n", - "* This notebook needs some understanding on advanced concepts in Python, notably \n", - " * classes\n", - " * the Python `with` statement\n", - " * tracing\n", - " * measuring time\n", - " * exceptions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synopsis\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "The `ExpectError` class allows you to catch and report exceptions, yet resume execution. This is useful in notebooks, as they would normally interrupt execution as soon as an exception is raised. Its typical usage is in conjunction with a `with` clause:\n", - "\n", - "```python\n", - "with ExpectError():\n", - " x = 1 / 0\n", - "```\n", - "```python\n", - "=> Traceback (most recent call last):\n", - " File \"\", line 2, in \n", - " x = 1 / 0\n", - "ZeroDivisionError: division by zero (expected)\n", - "\n", - "```\n", - "The `ExpectTimeout` class allows you to interrupt execution after the specified time. This is useful for interrupting code that might otherwise run forever.\n", - "\n", - "```python\n", - "with ExpectTimeout(5):\n", - " long_running_test()\n", - "```\n", - "```python\n", - "=> Start\n", - "0 seconds have passed\n", - "1 seconds have passed\n", - "2 seconds have passed\n", - "3 seconds have passed\n", - "\n", - "Traceback (most recent call last):\n", - " File \"\", line 2, in \n", - " long_running_test()\n", - " File \"\", line 5, in long_running_test\n", - " print(i, \"seconds have passed\")\n", - " File \"\", line 5, in long_running_test\n", - " print(i, \"seconds have passed\")\n", - " File \"\", line 16, in check_time\n", - " raise TimeoutError\n", - "TimeoutError (expected)\n", - "\n", - "```\n", - "The exception and the associated traceback are printed as error messages. If you do not want that, \n", - "use these keyword options:\n", - "\n", - "* `print_traceback` (default True) can be set to `False` to avoid the traceback being printed\n", - "* `mute` (default False) can be set to `True` to completely avoid any output.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Catching Errors\n", - "\n", - "The class `ExpectError` allows to express that some code produces an exception. A typical usage looks as follows:\n", - "\n", - "```Python\n", - "from ExpectError import ExpectError\n", - "\n", - "with ExpectError():\n", - " function_that_is_supposed_to_fail()\n", - "```\n", - "\n", - "If an exception occurs, it is printed on standard error; yet, execution continues." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - }, - "slideshow": { - "slide_type": "skip" - } - }, - "outputs": [], - "source": [ - "import bookutils" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "import traceback\n", - "import sys" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from types import FrameType, TracebackType\n", - "from typing import Union, Optional, Callable, Any" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "class ExpectError:\n", - " \"\"\"Execute a code block expecting (and catching) an error.\"\"\"\n", - "\n", - " def __init__(self, exc_type: Optional[type] = None, \n", - " print_traceback: bool = True, mute: bool = False):\n", - " \"\"\"\n", - " Constructor. Expect an exception of type `exc_type` (`None`: any exception).\n", - " If `print_traceback` is set (default), print a traceback to stderr.\n", - " If `mute` is set (default: False), do not print anything.\n", - " \"\"\"\n", - " self.print_traceback = print_traceback\n", - " self.mute = mute\n", - " self.expected_exc_type = exc_type\n", - "\n", - " def __enter__(self) -> Any:\n", - " \"\"\"Begin of `with` block\"\"\"\n", - " return self\n", - "\n", - " def __exit__(self, exc_type: type, \n", - " exc_value: BaseException, tb: TracebackType) -> Optional[bool]:\n", - " \"\"\"End of `with` block\"\"\"\n", - " if exc_type is None:\n", - " # No exception\n", - " return\n", - "\n", - " if (self.expected_exc_type is not None\n", - " and exc_type != self.expected_exc_type):\n", - " raise # Unexpected exception\n", - "\n", - " # An exception occurred\n", - " if self.print_traceback:\n", - " lines = ''.join(\n", - " traceback.format_exception(\n", - " exc_type,\n", - " exc_value,\n", - " tb)).strip()\n", - " else:\n", - " lines = traceback.format_exception_only(\n", - " exc_type, exc_value)[-1].strip()\n", - "\n", - " if not self.mute:\n", - " print(lines, \"(expected)\", file=sys.stderr)\n", - " return True # Ignore it" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "Here's an example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "def fail_test() -> None:\n", - " # Trigger an exception\n", - " x = 1 / 0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "with ExpectError():\n", - " fail_test()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "with ExpectError(print_traceback=False):\n", - " fail_test()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can specify the type of the expected exception. This way, if something else happens, we will get notified." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with ExpectError(ZeroDivisionError):\n", - " fail_test()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with ExpectError():\n", - " with ExpectError(ZeroDivisionError):\n", - " some_nonexisting_function() # type: ignore" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Catching Timeouts\n", - "\n", - "The class `ExpectTimeout(seconds)` allows to express that some code may run for a long or infinite time; execution is thus interrupted after `seconds` seconds. A typical usage looks as follows:\n", - "\n", - "```Python\n", - "from ExpectError import ExpectTimeout\n", - "\n", - "with ExpectTimeout(2) as t:\n", - " function_that_is_supposed_to_hang()\n", - "```\n", - "\n", - "If an exception occurs, it is printed on standard error (as with `ExpectError`); yet, execution continues.\n", - "\n", - "Should there be a need to cancel the timeout within the `with` block, `t.cancel()` will do the trick.\n", - "\n", - "The implementation uses `sys.settrace()`, as this seems to be the most portable way to implement timeouts. It is not very efficient, though. Also, it only works on individual lines of Python code and will not interrupt a long-running system function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "import sys\n", - "import time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "class ExpectTimeout:\n", - " \"\"\"Execute a code block expecting (and catching) a timeout.\"\"\"\n", - "\n", - " def __init__(self, seconds: Union[int, float], \n", - " print_traceback: bool = True, mute: bool = False):\n", - " \"\"\"\n", - " Constructor. Interrupe execution after `seconds` seconds.\n", - " If `print_traceback` is set (default), print a traceback to stderr.\n", - " If `mute` is set (default: False), do not print anything.\n", - " \"\"\"\n", - "\n", - " self.seconds_before_timeout = seconds\n", - " self.original_trace_function: Optional[Callable] = None\n", - " self.end_time: Optional[float] = None\n", - " self.print_traceback = print_traceback\n", - " self.mute = mute\n", - "\n", - " def check_time(self, frame: FrameType, event: str, arg: Any) -> Callable:\n", - " \"\"\"Tracing function\"\"\"\n", - " if self.original_trace_function is not None:\n", - " self.original_trace_function(frame, event, arg)\n", - "\n", - " current_time = time.time()\n", - " if self.end_time and current_time >= self.end_time:\n", - " raise TimeoutError\n", - "\n", - " return self.check_time\n", - "\n", - " def __enter__(self) -> Any:\n", - " \"\"\"Begin of `with` block\"\"\"\n", - "\n", - " start_time = time.time()\n", - " self.end_time = start_time + self.seconds_before_timeout\n", - "\n", - " self.original_trace_function = sys.gettrace()\n", - " sys.settrace(self.check_time)\n", - " return self\n", - "\n", - " def __exit__(self, exc_type: type, \n", - " exc_value: BaseException, tb: TracebackType) -> Optional[bool]:\n", - " \"\"\"End of `with` block\"\"\"\n", - "\n", - " self.cancel()\n", - "\n", - " if exc_type is None:\n", - " return\n", - "\n", - " # An exception occurred\n", - " if self.print_traceback:\n", - " lines = ''.join(\n", - " traceback.format_exception(\n", - " exc_type,\n", - " exc_value,\n", - " tb)).strip()\n", - " else:\n", - " lines = traceback.format_exception_only(\n", - " exc_type, exc_value)[-1].strip()\n", - "\n", - " if not self.mute:\n", - " print(lines, \"(expected)\", file=sys.stderr)\n", - " return True # Ignore it\n", - "\n", - " def cancel(self) -> None:\n", - " sys.settrace(self.original_trace_function)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "Here's an example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "def long_running_test() -> None:\n", - " print(\"Start\")\n", - " for i in range(10):\n", - " time.sleep(1)\n", - " print(i, \"seconds have passed\")\n", - " print(\"End\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "with ExpectTimeout(5, print_traceback=False):\n", - " long_running_test()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "Note that it is possible to nest multiple timeouts." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "with ExpectTimeout(5):\n", - " with ExpectTimeout(3):\n", - " long_running_test()\n", - " long_running_test()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "That's it, folks – enjoy!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synopsis\n", - "\n", - "The `ExpectError` class allows you to catch and report exceptions, yet resume execution. This is useful in notebooks, as they would normally interrupt execution as soon as an exception is raised. Its typical usage is in conjunction with a `with` clause:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with ExpectError():\n", - " x = 1 / 0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `ExpectTimeout` class allows you to interrupt execution after the specified time. This is useful for interrupting code that might otherwise run forever." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with ExpectTimeout(5):\n", - " long_running_test()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The exception and the associated traceback are printed as error messages. If you do not want that, \n", - "use these keyword options:\n", - "\n", - "* `print_traceback` (default True) can be set to `False` to avoid the traceback being printed\n", - "* `mute` (default False) can be set to `True` to completely avoid any output." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": true, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Lessons Learned\n", - "\n", - "* With the `ExpectError` class, it is very easy to handle errors without interrupting notebook execution." - ] - } - ], - "metadata": { - "ipub": { - "bibliography": "fuzzingbook.bib", - "toc": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - }, - "toc-autonumbering": false - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/ExpectError.ipynb b/notebooks/ExpectError.ipynb new file mode 120000 index 000000000..dd639d2e3 --- /dev/null +++ b/notebooks/ExpectError.ipynb @@ -0,0 +1 @@ +shared/ExpectError.ipynb \ No newline at end of file diff --git a/notebooks/Timer.ipynb b/notebooks/Timer.ipynb deleted file mode 100644 index 73034a6a7..000000000 --- a/notebooks/Timer.ipynb +++ /dev/null @@ -1,323 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "# Timer\n", - "\n", - "The code in this notebook helps with measuring time." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "**Prerequisites**\n", - "\n", - "* This notebook needs some understanding on advanced concepts in Python, notably \n", - " * classes\n", - " * the Python `with` statement\n", - " * measuring time" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synopsis\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "The `Timer` class allows you to measure elapsed real time. Its typical usage is in conjunction with a `with` clause:\n", - "\n", - "```python\n", - "with Timer() as t:\n", - " some_long_running_function()\n", - "t.elapsed_time()\n", - "```\n", - "```python\n", - "=> 0.042843673028983176\n", - "```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Measuring Time\n", - "\n", - "The class `Timer` allows to measure the elapsed time during some code execution." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - }, - "slideshow": { - "slide_type": "skip" - } - }, - "outputs": [], - "source": [ - "import bookutils" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "import time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "def clock() -> float:\n", - " try:\n", - " return time.perf_counter() # Python 3\n", - " except:\n", - " return time.clock() # Python 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from types import FrameType, TracebackType\n", - "from typing import Type, Any" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "class Timer(object):\n", - " def __enter__(self) -> Any:\n", - " \"\"\"Begin of `with` block\"\"\"\n", - " self.start_time = clock()\n", - " self.end_time = None\n", - " return self\n", - "\n", - " def __exit__(self, exc_type: Type, exc_value: BaseException,\n", - " tb: TracebackType) -> None:\n", - " \"\"\"End of `with` block\"\"\"\n", - " self.end_time = clock() # type: ignore\n", - "\n", - " def elapsed_time(self) -> float:\n", - " \"\"\"Return elapsed time in seconds\"\"\"\n", - " if self.end_time is None:\n", - " # still running\n", - " return clock() - self.start_time\n", - " else:\n", - " return self.end_time - self.start_time # type: ignore" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "Here's an example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "def some_long_running_function() -> None:\n", - " i = 1000000\n", - " while i > 0:\n", - " i -= 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "print(\"Stopping total time:\")\n", - "with Timer() as t:\n", - " some_long_running_function()\n", - "print(t.elapsed_time())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "outputs": [], - "source": [ - "print(\"Stopping time in between:\")\n", - "with Timer() as t:\n", - " for i in range(10):\n", - " print(t.elapsed_time())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": false, - "run_control": { - "read_only": false - } - }, - "source": [ - "That's it, folks – enjoy!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synopsis\n", - "\n", - "The `Timer` class allows you to measure elapsed real time. Its typical usage is in conjunction with a `with` clause:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with Timer() as t:\n", - " some_long_running_function()\n", - "t.elapsed_time()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "button": false, - "new_sheet": true, - "run_control": { - "read_only": false - } - }, - "source": [ - "## Lessons Learned\n", - "\n", - "* With the `Timer` class, it is very easy to measure elapsed time." - ] - } - ], - "metadata": { - "ipub": { - "bibliography": "fuzzingbook.bib", - "toc": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - }, - "toc-autonumbering": false - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/Timer.ipynb b/notebooks/Timer.ipynb new file mode 120000 index 000000000..5582fd5cf --- /dev/null +++ b/notebooks/Timer.ipynb @@ -0,0 +1 @@ +shared/Timer.ipynb \ No newline at end of file diff --git a/notebooks/bookutils b/notebooks/bookutils new file mode 120000 index 000000000..1ddd65e4c --- /dev/null +++ b/notebooks/bookutils @@ -0,0 +1 @@ +shared/bookutils \ No newline at end of file diff --git a/notebooks/bookutils/import_notebooks.py b/notebooks/bookutils/import_notebooks.py deleted file mode 100755 index 9452c27bb..000000000 --- a/notebooks/bookutils/import_notebooks.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python -# Settings and definitions for fuzzingbook notebooks - -# We want to import notebooks as modules -# Source: http://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Importing%20Notebooks.html - -import io, os, sys, types, re -from IPython import get_ipython -from nbformat import read -from IPython.core.interactiveshell import InteractiveShell -from importlib.abc import MetaPathFinder - -import linecache -import ast - -from typing import Optional, List, Any, Dict - -# To avoid re-running notebook computations during import, -# we only import code cells that match this regular expression -# i.e. definitions of -# * functions: `def func()` -# * classes: `class X:` -# * constants: `UPPERCASE_VARIABLES` -# * types: `TypeVariables`, and -# * imports: `import foo` -RE_CODE = re.compile(r"^(def |class |@|[A-Z][A-Za-z0-9_]+ [-+*/]?= |[A-Z][A-Za-z0-9_]+[.:]|import |from )") - -def do_import(code: str) -> bool: - """Return True if code is to be exported""" - while code.startswith('#') or code.startswith('\n'): - # Skip leading comments - code = code[code.find('\n') + 1:] - - return RE_CODE.match(code) is not None - -assert do_import("def foo():\n pass") -assert do_import("# ignore\ndef foo():\n pass") -assert do_import("# ignore\nclass Bar:\n pass") -assert do_import("XYZ = 123") -assert not do_import("xyz = 123") -assert not do_import("foo()") - -def find_notebook(fullname: str, path: Optional[List[str]] = None) -> Optional[str]: - """find a notebook, given its fully qualified name and an optional path - - This turns "foo.bar" into "foo/bar.ipynb" - and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar - does not exist. - """ - name = fullname.rsplit('.', 1)[-1] - if not path: - path = sys.path - for d in path: - nb_path = os.path.join(d, name + ".ipynb") - if os.path.isfile(nb_path): - return nb_path - # let import Notebook_Name find "Notebook Name.ipynb" - nb_path = nb_path.replace("_", " ") - if os.path.isfile(nb_path): - return nb_path - - return None - -class NotebookLoader: - """Module Loader for Jupyter Notebooks""" - def __init__(self, path: Optional[List[str]] = None) -> None: - self.shell = InteractiveShell.instance() - self.path = path - self.lines: Dict[str, str] = {} - - def load_module(self, fullname: str) -> types.ModuleType: - self.lines[fullname] = '' - """import a notebook as a module""" - path = find_notebook(fullname, self.path) - if path is None: - raise FileNotFoundError(f"Can't find {fullname}") - - # print ("importing Jupyter notebook from %s" % path) - - # load the notebook object - with io.open(path, 'r', encoding='utf-8') as f: - nb = read(f, 4) - - # create the module and add it to sys.modules - # if name in sys.modules: - # return sys.modules[name] - mod = types.ModuleType(fullname) - mod.__file__ = path - mod.__loader__ = self - mod.__dict__['get_ipython'] = get_ipython - sys.modules[fullname] = mod - - # extra work to ensure that magics that would affect the user_ns - # actually affect the notebook module's ns - save_user_ns = self.shell.user_ns - self.shell.user_ns = mod.__dict__ - - codecells = [self.shell.input_transformer_manager.transform_cell(cell.source) - for cell in nb.cells if cell.cell_type == 'code'] - source = [code for code in codecells if do_import(code)] - - lno = 1 - - try: - for code in source: - parsed = ast.parse(code, filename=path, mode='exec') - ast.increment_lineno(parsed, n=lno - 1) - exec(compile(parsed, path, 'exec'), mod.__dict__) - lno += len(code.split('\n')) - self.lines[fullname] = '\n'.join(source) - p = len(self.lines[fullname].split('\n')) + 1 - assert lno == p - - finally: - self.shell.user_ns = save_user_ns - data = self.lines[fullname] - linecache.cache[path] = (len(data), None, # type: ignore - [line+'\n' for line in data.splitlines()], - fullname) - return mod - -class NotebookFinder(MetaPathFinder): - """Module finder that locates Jupyter Notebooks""" - def __init__(self) -> None: - self.loaders: Dict[Any, Any] = {} - - def find_module(self, fullname: str, path: Any = None) -> Any: - nb_path = find_notebook(fullname, path) - if not nb_path: - return - - key = path - if path: - # lists aren't hashable - key = os.path.sep.join(path) - - if key not in self.loaders: - self.loaders[key] = NotebookLoader(path) - return self.loaders[key] - -sys.meta_path.append(NotebookFinder()) diff --git a/notebooks/shared/ClassDiagram.ipynb b/notebooks/shared/ClassDiagram.ipynb new file mode 100644 index 000000000..58136669e --- /dev/null +++ b/notebooks/shared/ClassDiagram.ipynb @@ -0,0 +1,1199 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "# Class Diagrams\n", + "\n", + "This is a simple viewer for class diagrams. Customized towards the book." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "**Prerequisites**\n", + "\n", + "* _Refer to earlier chapters as notebooks here, as here:_ [Earlier Chapter](Debugger.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + }, + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "import bookutils" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synopsis\n", + "\n", + "\n", + "To [use the code provided in this chapter](Importing.ipynb), write\n", + "\n", + "```python\n", + ">>> from fuzzingbook.ClassDiagram import \n", + "```\n", + "\n", + "and then make use of the following features.\n", + "\n", + "\n", + "The `display_class_hierarchy()` function shows the class hierarchy for the given class. Methods with docstrings (intended to be used by the public) are shown in bold.\n", + "\n", + "```python\n", + ">>> display_class_hierarchy(GrammarFuzzer)\n", + "```\n", + "\n", + "![](PICS/ClassDiagram-synopsis-1.svg)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": true, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Getting a Class Hierarchy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import inspect" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using `mro()`, we can access the class hierarchy. We make sure to avoid duplicates created by `class X(X)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Callable, Dict, Type, Set, List, Union, Any, Tuple, Optional" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def class_hierarchy(cls: Type) -> List[Type]:\n", + " superclasses = cls.mro()\n", + " hierarchy = []\n", + " last_superclass_name = \"\"\n", + "\n", + " for superclass in superclasses:\n", + " if superclass.__name__ != last_superclass_name:\n", + " hierarchy.append(superclass)\n", + " last_superclass_name = superclass.__name__\n", + "\n", + " return hierarchy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class A_Class:\n", + " \"\"\"A Class which does A thing right.\n", + " Comes with a longer docstring.\"\"\"\n", + "\n", + " def foo(self) -> None:\n", + " \"\"\"The Adventures of the glorious Foo\"\"\"\n", + " pass\n", + "\n", + " def quux(self) -> None:\n", + " \"\"\"A method that is not used.\"\"\"\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class A_Class(A_Class):\n", + " # We define another function in a separate cell.\n", + "\n", + " def second(self) -> None:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class B_Class(A_Class):\n", + " \"\"\"A subclass inheriting some methods.\"\"\"\n", + "\n", + " VAR = \"A variable\"\n", + "\n", + " def foo(self) -> None:\n", + " \"\"\"A WW2 foo fighter.\"\"\"\n", + " pass\n", + "\n", + " def bar(self, qux: Any = None, bartender: int = 42) -> None:\n", + " \"\"\"A qux walks into a bar.\n", + " `bartender` is an optional attribute.\"\"\"\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class C_Class:\n", + " \"\"\"A class injecting some method\"\"\"\n", + "\n", + " def qux(self) -> None:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class D_Class(B_Class, C_Class):\n", + " \"\"\"A subclass inheriting from multiple superclasses.\n", + " Comes with a fairly long, but meaningless documentation.\"\"\"\n", + "\n", + " def foo(self) -> None:\n", + " B_Class.foo(self)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class D_Class(D_Class):\n", + " pass # An incremental addiiton that should not impact D's semantics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_hierarchy(D_Class)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting a Class Tree" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use `__bases__` to obtain the immediate base classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "D_Class.__bases__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`class_tree()` returns a class tree, using the \"lowest\" (most specialized) class with the same name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def class_tree(cls: Type, lowest: Type = None) -> List[Tuple[Type, List]]:\n", + " ret = []\n", + " for base in cls.__bases__:\n", + " if base.__name__ == cls.__name__:\n", + " if not lowest:\n", + " lowest = cls\n", + " ret += class_tree(base, lowest)\n", + " else:\n", + " if lowest:\n", + " cls = lowest\n", + " ret.append((cls, class_tree(base)))\n", + "\n", + " return ret" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_tree(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_tree(D_Class)[0][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert class_tree(D_Class)[0][0] == D_Class" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`class_set()` flattens the tree into a set:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def class_set(classes: Union[Type, List[Type]]) -> Set[Type]:\n", + " if not isinstance(classes, list):\n", + " classes = [classes]\n", + "\n", + " ret = set()\n", + "\n", + " def traverse_tree(tree: List[Tuple[Type, List]]) -> None:\n", + " for (cls, subtrees) in tree:\n", + " ret.add(cls)\n", + " for subtree in subtrees:\n", + " traverse_tree(subtrees)\n", + "\n", + " for cls in classes:\n", + " traverse_tree(class_tree(cls))\n", + "\n", + " return ret" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_set(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert A_Class in class_set(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert B_Class in class_set(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert C_Class in class_set(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert D_Class in class_set(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_set([B_Class, C_Class])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting Docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "A_Class.__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "A_Class.__bases__[0].__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "A_Class.__bases__[0].__name__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "D_Class.foo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "D_Class.foo.__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "A_Class.foo.__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def docstring(obj: Any) -> str:\n", + " doc = inspect.getdoc(obj)\n", + " return doc if doc else \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docstring(A_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docstring(D_Class.foo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def unknown() -> None:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docstring(unknown)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def escape(text: str) -> str:\n", + " text = html.escape(text)\n", + " assert '<' not in text\n", + " assert '>' not in text\n", + " text = text.replace('{', '{')\n", + " text = text.replace('|', '|')\n", + " text = text.replace('}', '}')\n", + " return text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "escape(\"f(foo={})\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def escape_doc(docstring: str) -> str:\n", + " DOC_INDENT = 0\n", + " docstring = \" \".join(\n", + " ' ' * DOC_INDENT + escape(line).strip()\n", + " for line in docstring.split('\\n')\n", + " )\n", + " return docstring" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(escape_doc(\"'Hello\\n {You|Me}'\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Getting Methods and Variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inspect.getmembers(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def class_items(cls: Type, pred: Callable) -> List[Tuple[str, Any]]:\n", + " def _class_items(cls: Type) -> List:\n", + " all_items = inspect.getmembers(cls, pred)\n", + " for base in cls.__bases__:\n", + " all_items += _class_items(base)\n", + "\n", + " return all_items\n", + "\n", + " unique_items = []\n", + " items_seen = set()\n", + " for (name, item) in _class_items(cls):\n", + " if name not in items_seen:\n", + " unique_items.append((name, item))\n", + " items_seen.add(name)\n", + "\n", + " return unique_items" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def class_methods(cls: Type) -> List[Tuple[str, Callable]]:\n", + " return class_items(cls, inspect.isfunction)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def defined_in(name: str, cls: Type) -> bool:\n", + " if not hasattr(cls, name):\n", + " return False\n", + "\n", + " defining_classes = []\n", + "\n", + " def search_superclasses(name: str, cls: Type) -> None:\n", + " if not hasattr(cls, name):\n", + " return\n", + "\n", + " for base in cls.__bases__:\n", + " if hasattr(base, name):\n", + " defining_classes.append(base)\n", + " search_superclasses(name, base)\n", + "\n", + " search_superclasses(name, cls)\n", + "\n", + " if any(cls.__name__ != c.__name__ for c in defining_classes):\n", + " return False # Already defined in superclass\n", + "\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert not defined_in('VAR', A_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert defined_in('VAR', B_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert not defined_in('VAR', C_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert not defined_in('VAR', D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def class_vars(cls: Type) -> List[Any]:\n", + " def is_var(item: Any) -> bool:\n", + " return not callable(item)\n", + "\n", + " return [item for item in class_items(cls, is_var) \n", + " if not item[0].startswith('__') and defined_in(item[0], cls)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_methods(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class_vars(B_Class)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're only interested in \n", + "\n", + "* functions _defined_ in that class\n", + "* functions that come with a docstring" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def public_class_methods(cls: Type) -> List[Tuple[str, Callable]]:\n", + " return [(name, method) for (name, method) in class_methods(cls) \n", + " if method.__qualname__.startswith(cls.__name__)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def doc_class_methods(cls: Type) -> List[Tuple[str, Callable]]:\n", + " return [(name, method) for (name, method) in public_class_methods(cls) \n", + " if docstring(method) is not None]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "public_class_methods(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc_class_methods(D_Class)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def overloaded_class_methods(classes: Union[Type, List[Type]]) -> Set[str]:\n", + " all_methods: Dict[str, Set[Callable]] = {}\n", + " for cls in class_set(classes):\n", + " for (name, method) in class_methods(cls):\n", + " if method.__qualname__.startswith(cls.__name__):\n", + " all_methods.setdefault(name, set())\n", + " all_methods[name].add(cls)\n", + "\n", + " return set(name for name in all_methods if len(all_methods[name]) >= 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "overloaded_class_methods(D_Class)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Drawing Class Hierarchy with Method Names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from inspect import signature" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def display_class_hierarchy(classes: Union[Type, List[Type]], \n", + " public_methods: Optional[List] = None,\n", + " abstract_classes: Optional[List] = None,\n", + " include_methods: bool = True,\n", + " include_class_vars: bool =True,\n", + " include_legend: bool = True,\n", + " project: str = 'fuzzingbook',\n", + " log: bool = False) -> Any:\n", + " \"\"\"Visualize a class hierarchy.\n", + "`classes` is a Python class (or a list of classes) to be visualized.\n", + "`public_methods`, if given, is a list of methods to be shown as \"public\" (bold).\n", + " (Default: all methods with a docstring)\n", + "`abstract_classes`, if given, is a list of classes to be shown as \"abstract\" (cursive).\n", + " (Default: all classes with an abstract method)\n", + "`include_methods`: if True, include all methods (default)\n", + "`include_legend`: if True, include a legend (default)\n", + " \"\"\"\n", + " from graphviz import Digraph\n", + "\n", + " if project == 'debuggingbook':\n", + " CLASS_FONT = 'Raleway, Helvetica, Arial, sans-serif'\n", + " CLASS_COLOR = '#6A0DAD' # HTML 'purple'\n", + " else:\n", + " CLASS_FONT = 'Patua One, Helvetica, sans-serif'\n", + " CLASS_COLOR = '#B03A2E'\n", + "\n", + " METHOD_FONT = \"'Fira Mono', 'Source Code Pro', 'Courier', monospace\"\n", + " METHOD_COLOR = 'black'\n", + "\n", + " if isinstance(classes, list):\n", + " starting_class = classes[0]\n", + " else:\n", + " starting_class = classes\n", + " classes = [starting_class]\n", + "\n", + " title = starting_class.__name__ + \" class hierarchy\"\n", + "\n", + " dot = Digraph(comment=title)\n", + " dot.attr('node', shape='record', fontname=CLASS_FONT)\n", + " dot.attr('graph', rankdir='BT', tooltip=title)\n", + " dot.attr('edge', arrowhead='empty')\n", + " edges = set()\n", + " overloaded_methods: Set[str] = set()\n", + "\n", + " drawn_classes = set()\n", + "\n", + " def method_string(method_name: str, public: bool, overloaded: bool,\n", + " fontsize: float = 10.0) -> str:\n", + " method_string = f''\n", + "\n", + " if overloaded:\n", + " name = f'{method_name}()'\n", + " else:\n", + " name = f'{method_name}()'\n", + "\n", + " if public:\n", + " method_string += f'{name}'\n", + " else:\n", + " method_string += f'' \\\n", + " f'{name}'\n", + "\n", + " method_string += ''\n", + " return method_string\n", + "\n", + " def var_string(var_name: str, fontsize: int = 10) -> str:\n", + " var_string = f''\n", + " var_string += f'{var_name}'\n", + " var_string += ''\n", + " return var_string\n", + "\n", + " def is_overloaded(method_name: str, f: Any) -> bool:\n", + " return (method_name in overloaded_methods or\n", + " (docstring(f) is not None and \"in subclasses\" in docstring(f)))\n", + "\n", + " def is_abstract(cls: Type) -> bool:\n", + " if not abstract_classes:\n", + " return inspect.isabstract(cls)\n", + "\n", + " return (cls in abstract_classes or\n", + " any(c.__name__ == cls.__name__ for c in abstract_classes))\n", + "\n", + " def is_public(method_name: str, f: Any) -> bool:\n", + " if public_methods:\n", + " return (method_name in public_methods or\n", + " f in public_methods or\n", + " any(f.__qualname__ == m.__qualname__\n", + " for m in public_methods))\n", + "\n", + " return bool(docstring(f))\n", + "\n", + " def class_vars_string(cls: Type, url: str) -> str:\n", + " cls_vars = class_vars(cls)\n", + " if len(cls_vars) == 0:\n", + " return \"\"\n", + "\n", + " vars_string = f''\n", + "\n", + " for (name, var) in cls_vars:\n", + " if log:\n", + " print(f\" Drawing {name}\")\n", + "\n", + " var_doc = escape(f\"{name} = {repr(var)}\")\n", + " tooltip = f' tooltip=\"{var_doc}\"'\n", + " href = f' href=\"{url}\"'\n", + " vars_string += f''\n", + "\n", + " vars_string += '
'\n", + "\n", + " vars_string += var_string(name)\n", + " vars_string += '
'\n", + " return vars_string\n", + "\n", + " def class_methods_string(cls: Type, url: str) -> str:\n", + " methods = public_class_methods(cls)\n", + " # return \"
\".join([name + \"()\" for (name, f) in methods])\n", + " if len(methods) == 0:\n", + " return \"\"\n", + "\n", + " methods_string = f''\n", + "\n", + " for public in [True, False]:\n", + " for (name, f) in methods:\n", + " if public != is_public(name, f):\n", + " continue\n", + "\n", + " if log:\n", + " print(f\" Drawing {name}()\")\n", + "\n", + " if is_public(name, f) and not docstring(f):\n", + " warnings.warn(f\"{f.__qualname__}() is listed as public,\"\n", + " f\" but has no docstring\")\n", + "\n", + " overloaded = is_overloaded(name, f)\n", + "\n", + " method_doc = escape(name + str(inspect.signature(f)))\n", + " if docstring(f):\n", + " method_doc += \": \" + escape_doc(docstring(f))\n", + "\n", + " # Tooltips are only shown if a href is present, too\n", + " tooltip = f' tooltip=\"{method_doc}\"'\n", + " href = f' href=\"{url}\"'\n", + " methods_string += f''\n", + "\n", + " methods_string += '
'\n", + "\n", + " methods_string += method_string(name, public, overloaded)\n", + "\n", + " methods_string += '
'\n", + " return methods_string\n", + "\n", + " def display_class_node(cls: Type) -> None:\n", + " name = cls.__name__\n", + "\n", + " if name in drawn_classes:\n", + " return\n", + " drawn_classes.add(name)\n", + "\n", + " if log:\n", + " print(f\"Drawing class {name}\")\n", + "\n", + " if cls.__module__ == '__main__':\n", + " url = '#'\n", + " else:\n", + " url = cls.__module__ + '.ipynb'\n", + "\n", + " if is_abstract(cls):\n", + " formatted_class_name = f'{cls.__name__}'\n", + " else:\n", + " formatted_class_name = cls.__name__\n", + "\n", + " if include_methods or include_class_vars:\n", + " vars = class_vars_string(cls, url)\n", + " methods = class_methods_string(cls, url)\n", + " spec = '<{' + \\\n", + " formatted_class_name + ''\n", + " if include_class_vars and vars:\n", + " spec += '|' + vars\n", + " if include_methods and methods:\n", + " spec += '|' + methods\n", + " spec += '}>'\n", + " else:\n", + " spec = '<' + formatted_class_name + '>'\n", + "\n", + " class_doc = escape('class ' + cls.__name__)\n", + " if docstring(cls):\n", + " class_doc += ': ' + escape_doc(docstring(cls))\n", + " else:\n", + " warnings.warn(f\"Class {cls.__name__} has no docstring\")\n", + "\n", + " dot.node(name, spec, tooltip=class_doc, href=url)\n", + "\n", + " def display_class_trees(trees: List[Tuple[Type, List]]) -> None:\n", + " for tree in trees:\n", + " (cls, subtrees) = tree\n", + " display_class_node(cls)\n", + "\n", + " for subtree in subtrees:\n", + " (subcls, _) = subtree\n", + "\n", + " if (cls.__name__, subcls.__name__) not in edges:\n", + " dot.edge(cls.__name__, subcls.__name__)\n", + " edges.add((cls.__name__, subcls.__name__))\n", + "\n", + " display_class_trees(subtrees)\n", + "\n", + " def display_legend() -> None:\n", + " fontsize = 8.0\n", + "\n", + " label = f'Legend
' \n", + "\n", + " for item in [\n", + " method_string(\"public_method\",\n", + " public=True, overloaded=False, fontsize=fontsize),\n", + " method_string(\"private_method\",\n", + " public=False, overloaded=False, fontsize=fontsize),\n", + " method_string(\"overloaded_method\",\n", + " public=False, overloaded=True, fontsize=fontsize)\n", + " ]:\n", + " label += '• ' + item + '
'\n", + "\n", + " label += f'' \\\n", + " 'Hover over names to see doc' \\\n", + " '
'\n", + "\n", + " dot.node('Legend', label=f'<{label}>', shape='plain', fontsize=str(fontsize + 2))\n", + "\n", + " for cls in classes:\n", + " tree = class_tree(cls)\n", + " overloaded_methods = overloaded_class_methods(cls)\n", + " display_class_trees(tree)\n", + "\n", + " if include_legend:\n", + " display_legend()\n", + "\n", + " return dot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display_class_hierarchy(D_Class, project='debuggingbook', log=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display_class_hierarchy(D_Class, project='fuzzingbook')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is a variant with abstract classes and logging:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display_class_hierarchy([A_Class, B_Class],\n", + " abstract_classes=[A_Class],\n", + " public_methods=[\n", + " A_Class.quux,\n", + " ], log=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synopsis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function `display_class_hierarchy()` function shows the class hierarchy for the given class (or list of classes). \n", + "* The keyword parameter `public_methods`, if given, is a list of \"public\" methods to be used by clients (default: all methods with docstrings).\n", + "* The keyword parameter `abstract_classes`, if given, is a list of classes to be displayed as \"abstract\" (i.e. with a cursive class name)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display_class_hierarchy(D_Class, abstract_classes=[A_Class])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Enjoy!" + ] + } + ], + "metadata": { + "ipub": { + "bibliography": "fuzzingbook.bib", + "toc": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "toc-autonumbering": false + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/shared/ExpectError.ipynb b/notebooks/shared/ExpectError.ipynb new file mode 100644 index 000000000..65d158175 --- /dev/null +++ b/notebooks/shared/ExpectError.ipynb @@ -0,0 +1,626 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "# Error Handling\n", + "\n", + "The code in this notebook helps with handling errors. Normally, an error in notebook code causes the execution of the code to stop; while an infinite loop in notebook code causes the notebook to run without end. This notebook provides two classes to help address these concerns." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "**Prerequisites**\n", + "\n", + "* This notebook needs some understanding on advanced concepts in Python, notably \n", + " * classes\n", + " * the Python `with` statement\n", + " * tracing\n", + " * measuring time\n", + " * exceptions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synopsis\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "The `ExpectError` class allows you to catch and report exceptions, yet resume execution. This is useful in notebooks, as they would normally interrupt execution as soon as an exception is raised. Its typical usage is in conjunction with a `with` clause:\n", + "\n", + "```python\n", + "with ExpectError():\n", + " x = 1 / 0\n", + "```\n", + "```python\n", + "=> Traceback (most recent call last):\n", + " File \"\", line 2, in \n", + " x = 1 / 0\n", + "ZeroDivisionError: division by zero (expected)\n", + "\n", + "```\n", + "The `ExpectTimeout` class allows you to interrupt execution after the specified time. This is useful for interrupting code that might otherwise run forever.\n", + "\n", + "```python\n", + "with ExpectTimeout(5):\n", + " long_running_test()\n", + "```\n", + "```python\n", + "=> Start\n", + "0 seconds have passed\n", + "1 seconds have passed\n", + "2 seconds have passed\n", + "3 seconds have passed\n", + "\n", + "Traceback (most recent call last):\n", + " File \"\", line 2, in \n", + " long_running_test()\n", + " File \"\", line 5, in long_running_test\n", + " print(i, \"seconds have passed\")\n", + " File \"\", line 5, in long_running_test\n", + " print(i, \"seconds have passed\")\n", + " File \"\", line 16, in check_time\n", + " raise TimeoutError\n", + "TimeoutError (expected)\n", + "\n", + "```\n", + "The exception and the associated traceback are printed as error messages. If you do not want that, \n", + "use these keyword options:\n", + "\n", + "* `print_traceback` (default True) can be set to `False` to avoid the traceback being printed\n", + "* `mute` (default False) can be set to `True` to completely avoid any output.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Catching Errors\n", + "\n", + "The class `ExpectError` allows to express that some code produces an exception. A typical usage looks as follows:\n", + "\n", + "```Python\n", + "from ExpectError import ExpectError\n", + "\n", + "with ExpectError():\n", + " function_that_is_supposed_to_fail()\n", + "```\n", + "\n", + "If an exception occurs, it is printed on standard error; yet, execution continues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + }, + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "import bookutils" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "import traceback\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from types import FrameType, TracebackType\n", + "from typing import Union, Optional, Callable, Any" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "class ExpectError:\n", + " \"\"\"Execute a code block expecting (and catching) an error.\"\"\"\n", + "\n", + " def __init__(self, exc_type: Optional[type] = None, \n", + " print_traceback: bool = True, mute: bool = False):\n", + " \"\"\"\n", + " Constructor. Expect an exception of type `exc_type` (`None`: any exception).\n", + " If `print_traceback` is set (default), print a traceback to stderr.\n", + " If `mute` is set (default: False), do not print anything.\n", + " \"\"\"\n", + " self.print_traceback = print_traceback\n", + " self.mute = mute\n", + " self.expected_exc_type = exc_type\n", + "\n", + " def __enter__(self) -> Any:\n", + " \"\"\"Begin of `with` block\"\"\"\n", + " return self\n", + "\n", + " def __exit__(self, exc_type: type, \n", + " exc_value: BaseException, tb: TracebackType) -> Optional[bool]:\n", + " \"\"\"End of `with` block\"\"\"\n", + " if exc_type is None:\n", + " # No exception\n", + " return\n", + "\n", + " if (self.expected_exc_type is not None\n", + " and exc_type != self.expected_exc_type):\n", + " raise # Unexpected exception\n", + "\n", + " # An exception occurred\n", + " if self.print_traceback:\n", + " lines = ''.join(\n", + " traceback.format_exception(\n", + " exc_type,\n", + " exc_value,\n", + " tb)).strip()\n", + " else:\n", + " lines = traceback.format_exception_only(\n", + " exc_type, exc_value)[-1].strip()\n", + "\n", + " if not self.mute:\n", + " print(lines, \"(expected)\", file=sys.stderr)\n", + " return True # Ignore it" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "def fail_test() -> None:\n", + " # Trigger an exception\n", + " x = 1 / 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "with ExpectError():\n", + " fail_test()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "with ExpectError(print_traceback=False):\n", + " fail_test()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can specify the type of the expected exception. This way, if something else happens, we will get notified." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with ExpectError(ZeroDivisionError):\n", + " fail_test()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with ExpectError():\n", + " with ExpectError(ZeroDivisionError):\n", + " some_nonexisting_function() # type: ignore" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Catching Timeouts\n", + "\n", + "The class `ExpectTimeout(seconds)` allows to express that some code may run for a long or infinite time; execution is thus interrupted after `seconds` seconds. A typical usage looks as follows:\n", + "\n", + "```Python\n", + "from ExpectError import ExpectTimeout\n", + "\n", + "with ExpectTimeout(2) as t:\n", + " function_that_is_supposed_to_hang()\n", + "```\n", + "\n", + "If an exception occurs, it is printed on standard error (as with `ExpectError`); yet, execution continues.\n", + "\n", + "Should there be a need to cancel the timeout within the `with` block, `t.cancel()` will do the trick.\n", + "\n", + "The implementation uses `sys.settrace()`, as this seems to be the most portable way to implement timeouts. It is not very efficient, though. Also, it only works on individual lines of Python code and will not interrupt a long-running system function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "import sys\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "class ExpectTimeout:\n", + " \"\"\"Execute a code block expecting (and catching) a timeout.\"\"\"\n", + "\n", + " def __init__(self, seconds: Union[int, float], \n", + " print_traceback: bool = True, mute: bool = False):\n", + " \"\"\"\n", + " Constructor. Interrupe execution after `seconds` seconds.\n", + " If `print_traceback` is set (default), print a traceback to stderr.\n", + " If `mute` is set (default: False), do not print anything.\n", + " \"\"\"\n", + "\n", + " self.seconds_before_timeout = seconds\n", + " self.original_trace_function: Optional[Callable] = None\n", + " self.end_time: Optional[float] = None\n", + " self.print_traceback = print_traceback\n", + " self.mute = mute\n", + "\n", + " def check_time(self, frame: FrameType, event: str, arg: Any) -> Callable:\n", + " \"\"\"Tracing function\"\"\"\n", + " if self.original_trace_function is not None:\n", + " self.original_trace_function(frame, event, arg)\n", + "\n", + " current_time = time.time()\n", + " if self.end_time and current_time >= self.end_time:\n", + " raise TimeoutError\n", + "\n", + " return self.check_time\n", + "\n", + " def __enter__(self) -> Any:\n", + " \"\"\"Begin of `with` block\"\"\"\n", + "\n", + " start_time = time.time()\n", + " self.end_time = start_time + self.seconds_before_timeout\n", + "\n", + " self.original_trace_function = sys.gettrace()\n", + " sys.settrace(self.check_time)\n", + " return self\n", + "\n", + " def __exit__(self, exc_type: type, \n", + " exc_value: BaseException, tb: TracebackType) -> Optional[bool]:\n", + " \"\"\"End of `with` block\"\"\"\n", + "\n", + " self.cancel()\n", + "\n", + " if exc_type is None:\n", + " return\n", + "\n", + " # An exception occurred\n", + " if self.print_traceback:\n", + " lines = ''.join(\n", + " traceback.format_exception(\n", + " exc_type,\n", + " exc_value,\n", + " tb)).strip()\n", + " else:\n", + " lines = traceback.format_exception_only(\n", + " exc_type, exc_value)[-1].strip()\n", + "\n", + " if not self.mute:\n", + " print(lines, \"(expected)\", file=sys.stderr)\n", + " return True # Ignore it\n", + "\n", + " def cancel(self) -> None:\n", + " sys.settrace(self.original_trace_function)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "def long_running_test() -> None:\n", + " print(\"Start\")\n", + " for i in range(10):\n", + " time.sleep(1)\n", + " print(i, \"seconds have passed\")\n", + " print(\"End\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "with ExpectTimeout(5, print_traceback=False):\n", + " long_running_test()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "Note that it is possible to nest multiple timeouts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "with ExpectTimeout(5):\n", + " with ExpectTimeout(3):\n", + " long_running_test()\n", + " long_running_test()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "That's it, folks – enjoy!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synopsis\n", + "\n", + "The `ExpectError` class allows you to catch and report exceptions, yet resume execution. This is useful in notebooks, as they would normally interrupt execution as soon as an exception is raised. Its typical usage is in conjunction with a `with` clause:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with ExpectError():\n", + " x = 1 / 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `ExpectTimeout` class allows you to interrupt execution after the specified time. This is useful for interrupting code that might otherwise run forever." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with ExpectTimeout(5):\n", + " long_running_test()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The exception and the associated traceback are printed as error messages. If you do not want that, \n", + "use these keyword options:\n", + "\n", + "* `print_traceback` (default True) can be set to `False` to avoid the traceback being printed\n", + "* `mute` (default False) can be set to `True` to completely avoid any output." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": true, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Lessons Learned\n", + "\n", + "* With the `ExpectError` class, it is very easy to handle errors without interrupting notebook execution." + ] + } + ], + "metadata": { + "ipub": { + "bibliography": "fuzzingbook.bib", + "toc": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "toc-autonumbering": false + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/shared/Makefile b/notebooks/shared/Makefile new file mode 100644 index 000000000..99f8b45bd --- /dev/null +++ b/notebooks/shared/Makefile @@ -0,0 +1,1285 @@ +# Fuzzingbook/Debuggingbook Makefile + +# Get chapter files +CHAPTERS_MAKEFILE = Chapters.makefile +include $(CHAPTERS_MAKEFILE) + +# All source notebooks +SOURCE_FILES = \ + $(FRONTMATTER) \ + $(CHAPTERS) \ + $(APPENDICES) \ + $(EXTRAS) + +# The bibliography file +BIB = fuzzingbook.bib + +# The utilities folder +UTILS = bookutils + +# The utilities in $(UTILS) +UTILITY_FILES = \ + __init__.py \ + PrettyTable.py \ + README.md \ + export_notebook_code.py \ + import_notebooks.py \ + set_fixed_seed.py + +# Where the notebooks are +NOTEBOOKS = notebooks + +# Derived versions including HTML, SVG, and text output cells (for Web) +FULL_NOTEBOOKS = full_notebooks + +# Derived versions including PNG and text output cells, +# but without excursions (for LaTeX and PDF) +RENDERED_NOTEBOOKS = rendered + +# Git repo +GITHUB_REPO = https://github.com/uds-se/$(PROJECT)/ +BINDER_URL = https://mybinder.org/v2/gh/uds-se/$(PROJECT)/master?filepath=docs/beta/notebooks/00_Table_of_Contents.ipynb +PROJECT_URL = https://beta.$(PROJECT).org + +# Sources in the notebooks folder +SOURCES = $(SOURCE_FILES:%=$(NOTEBOOKS)/%) +CHAPTER_SOURCES = $(CHAPTERS:%=$(NOTEBOOKS)/%) +ALL_CHAPTER_SOURCES = $(CHAPTERS:%=$(NOTEBOOKS)/%) +PUBLIC_SOURCES = $(PUBLIC_CHAPTERS:%=$(NOTEBOOKS)/%) +READY_SOURCES = $(READY_CHAPTERS:%=$(NOTEBOOKS)/%) +TODO_SOURCES = $(TODO_CHAPTERS:%=$(NOTEBOOKS)/%) +NEW_SOURCES = $(NEW_CHAPTERS:%=$(NOTEBOOKS)/%) +APPENDICES_SOURCES = $(APPENDICES:%=$(NOTEBOOKS)/%) + +# Where to place the pdf, html, slides +PDF_TARGET = pdf/ +NBPDF_TARGET = nbpdf/ +HTML_TARGET = html/ +SLIDES_TARGET = slides/ +CODE_TARGET = code/ +MYPY_TARGET = mypy/ +MARKDOWN_TARGET = markdown/ +WORD_TARGET = word/ +EPUB_TARGET = epub/ +DEPEND_TARGET = .depend/ +DOCS_TARGET = docs/ + +# If BETA=y, we create files in the "beta" subdir. Use 'make docs-beta', 'make html-beta' to invoke +ifdef BETA +DOCS_TARGET := docs/beta/ +HTML_TARGET := beta/$(HTML_TARGET) +SLIDES_TARGET := beta/$(SLIDES_TARGET) +CODE_TARGET := beta/$(CODE_TARGET) +BETA_FLAG = --include-ready --include-todo +endif +ifndef BETA +# Avoid warning: undefined variable `BETA_FLAG' +BETA_FLAG = +endif + +# Files to appear in the table of contents +ifndef BETA +CHAPTER_SOURCES := $(PUBLIC_CHAPTERS:%=$(NOTEBOOKS)/%) +endif +ifdef BETA +PUBLIC_CHAPTERS := $(CHAPTERS) +endif +TOC_CHAPTERS := $(PUBLIC_CHAPTERS) +TOC_APPENDICES = $(APPENDICES) + +# Files to appear on the Web page +DOCS = \ + $(FRONTMATTER:%.ipynb=%) \ + $(TOC_CHAPTERS:%.ipynb=%) \ + $(APPENDICES:%.ipynb=%) \ + $(EXTRAS:%.ipynb=%) + + +# Various derived files +TEXS = $(SOURCE_FILES:%.ipynb=$(PDF_TARGET)%.tex) +PDFS = $(SOURCE_FILES:%.ipynb=$(PDF_TARGET)%.pdf) +NBPDFS = $(SOURCE_FILES:%.ipynb=$(NBPDF_TARGET)%.pdf) +HTMLS = $(SOURCE_FILES:%.ipynb=$(HTML_TARGET)%.html) +SLIDES = $(SOURCE_FILES:%.ipynb=$(SLIDES_TARGET)%.slides.html) +PYS = $(SOURCE_FILES:%.ipynb=$(CODE_TARGET)%.py) \ + $(CODE_TARGET)setup.py \ + $(CODE_TARGET)__init__.py +MYPYS = $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET)%.py) +WORDS = $(SOURCE_FILES:%.ipynb=$(WORD_TARGET)%.docx) +MARKDOWNS = $(SOURCE_FILES:%.ipynb=$(MARKDOWN_TARGET)%.md) +EPUBS = $(SOURCE_FILES:%.ipynb=$(EPUB_TARGET)%.epub) +FULLS = $(FULL_NOTEBOOKS)/$(UTILS) \ + $(UTILITY_FILES:%=$(FULL_NOTEBOOKS)/$(UTILS)/%) \ + $(SOURCE_FILES:%.ipynb=$(FULL_NOTEBOOKS)/%.ipynb) +RENDERS = $(SOURCE_FILES:%.ipynb=$(RENDERED_NOTEBOOKS)/%.ipynb) + +DEPENDS = $(SOURCE_FILES:%.ipynb=$(DEPEND_TARGET)%.makefile) + +CHAPTER_PYS = $(CHAPTERS:%.ipynb=$(CODE_TARGET)%.py) + +PDF_FILES = $(SOURCE_FILES:%.ipynb=$(PDF_TARGET)%_files) +NBPDF_FILES = $(SOURCE_FILES:%.ipynb=$(NBPDF_TARGET)%_files) +HTML_FILES = $(SOURCE_FILES:%.ipynb=$(HTML_TARGET)%_files) +SLIDES_FILES = $(SOURCE_FILES:%.ipynb=$(SLIDES_TARGET)%_files) + +SITEMAP_SVG = $(NOTEBOOKS)/PICS/Sitemap.svg + + +# Configuration +# The site +SITE = https://www.$(PROJECT).org + +# What we use for production: nbpublish (preferred), bookbook, or nbconvert +PUBLISH ?= nbpublish + +# What we use for LaTeX: latexmk (preferred), or pdflatex +LATEX ?= latexmk + +## Tools +# Python +PYTHON ?= python3 + +# Jupyter +JUPYTER ?= jupyter + +# The nbpublish tool (preferred; https://github.com/chrisjsewell/ipypublish) +# (see nbpublish -h for details) +NBPUBLISH ?= nbpublish +NBPUBLISH_OPTIONS ?= -log warning + +# The bookbook tool (okay for chapters and books; but no citations yet) +# https://github.com/takluyver/bookbook +BOOKBOOK_LATEX ?= $(PYTHON) -m bookbook.latex +BOOKBOOK_HTML ?= $(PYTHON) -m bookbook.html + +# The nbconvert alternative (okay for chapters; doesn't work for book; no citations) +NBCONVERT ?= $(JUPYTER) nbconvert +NBCONVERT_OPTIONS ?= --log-level=WARN + +# Notebook merger +NBMERGE = $(PYTHON) utils/nbmerge.py + +# LaTeX +PDFLATEX ?= pdflatex +XELATEX ?= xelatex +BIBTEX ?= bibtex +LATEXMK ?= latexmk +LATEXMK_OPTS ?= -xelatex -quiet -f -interaction=nonstopmode + +# Word +PANDOC ?= pandoc + +# Markdown (see https://github.com/aaren/notedown) +NOTEDOWN ?= notedown + +# Style checks +PYCODESTYLE ?= pycodestyle +PYCODESTYLE_CFG = code/pycodestyle.cfg + +AUTOPEP8 ?= autopep8 +AUTOPEP8_CFG = code/autopep8.cfg +AUTOPEP8_OPTIONS = --global-config $(AUTOPEP8_CFG) --aggressive --in-place +NBAUTOPEP8 = $(PYTHON) utils/nbautopep8.py + +# Program to open files after creating, say OPEN=open (default: ignore; "true" does nothing) +OPEN ?= true + +# Make directory +MKDIR = mkdir -p + +ifndef PUBLISH +# Determine publishing program +OUT := $(shell which $(NBPUBLISH) > /dev/null && echo yes) +ifeq ($(OUT),yes) +# We have nbpublish +PUBLISH = nbpublish +else +# Issue a warning message +OUT := $(shell $(NBPUBLISH) -h > /dev/null) +# We have nbconvert +PUBLISH = nbconvert +PUBLISH_PLUGINS = +endif +endif + +ifndef LATEX +# Determine publishing program +OUT := $(shell which $(LATEXMK) > /dev/null && echo yes) +ifeq ($(OUT),yes) +# We have latexmk +LATEX = $(LATEXMK) +else +# Issue a warning message +OUT := $(shell $(LATEXMK) -h > /dev/null) +# We have pdflatex +LATEX = $(PDFLATEX) +endif +endif + + +# Book base name +BOOK = $(PROJECT) + +ifeq ($(PUBLISH),bookbook) +# Use bookbook +CONVERT_TO_HTML = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to html --output-dir=$(HTML_TARGET) +CONVERT_TO_TEX = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to latex --template $(PROJECT).tplx --output-dir=$(PDF_TARGET) +BOOK_TEX = $(PDF_TARGET)$(BOOK).tex +BOOK_PDF = $(PDF_TARGET)$(BOOK).pdf +BOOK_HTML = $(HTML_TARGET)$(BOOK).html +BOOK_HTML_FILES = $(HTML_TARGET)$(BOOK)_files +BOOK_PDF_FILES = $(PDF_TARGET)$(BOOK)_files +PUBLISH_PLUGINS = +else +ifeq ($(PUBLISH),nbpublish) +# Use nbpublish +CONVERT_TO_HTML = $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f html_ipypublish_chapter --outpath $(HTML_TARGET) +CONVERT_TO_TEX = $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f latex_ipypublish_chapter --outpath $(PDF_TARGET) +# CONVERT_TO_SLIDES = $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f slides_ipypublish_all --outpath $(SLIDES_TARGET) +BOOK_TEX = $(PDF_TARGET)$(BOOK).tex +BOOK_PDF = $(PDF_TARGET)$(BOOK).pdf +BOOK_HTML = $(HTML_TARGET)$(BOOK).html +BOOK_HTML_FILES = $(HTML_TARGET)$(BOOK)_files +BOOK_PDF_FILES = $(PDF_TARGET)$(BOOK)_files +PUBLISH_PLUGINS = \ + ipypublish_plugins/html_ipypublish_chapter.py \ + ipypublish_plugins/latex_ipypublish_book.py \ + ipypublish_plugins/latex_ipypublish_chapter.py +else +# Use standard Jupyter tools +CONVERT_TO_HTML = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to html --output-dir=$(HTML_TARGET) +CONVERT_TO_TEX = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to latex --template $(PROJECT).tplx --output-dir=$(PDF_TARGET) +# CONVERT_TO_SLIDES = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to slides --output-dir=$(SLIDES_TARGET) +BOOK_TEX = +BOOK_PDF = +BOOK_HTML = +BOOK_HTML_FILES = +BOOK_PDF_FILES = +PUBLISH_PLUGINS = +endif +endif + +# For Python, we use our own script that takes care of distinguishing +# main (script) code from definitions to be imported +EXPORT_NOTEBOOK_CODE = $(NOTEBOOKS)/$(UTILS)/export_notebook_code.py +CONVERT_TO_PYTHON = $(PYTHON) $(EXPORT_NOTEBOOK_CODE) + +# This would be the Jupyter alternative +# CONVERT_TO_PYTHON = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to python --output-dir=$(CODE_TARGET) + +# For slides, we use the standard Jupyter tools +# Main reason: Jupyter has a neat interface to control slides/sub-slides/etc +CONVERT_TO_SLIDES = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to slides --output-dir=$(SLIDES_TARGET) +REVEAL_JS = $(SLIDES_TARGET)reveal.js + +# For Word .docx files, we start from the HTML version +CONVERT_TO_WORD = $(PANDOC) + +# For Markdown .md files, we use markdown +# Note: adding --run re-executes all code +# CONVERT_TO_MARKDOWN = $(NOTEDOWN) --to markdown +CONVERT_TO_MARKDOWN = $(NBCONVERT) $(NBCONVERT_OPTIONS) --to markdown --output-dir=$(MARKDOWN_TARGET) + +# Run +# fuzzingbook/WhenToStopFuzzing needs about 120 seconds to render +# debuggingbook/Tracing may need up to 10 minutes +EXECUTE_TIMEOUT ?= 140 +EXECUTE_OPTIONS ?= --ExecutePreprocessor.timeout=$(EXECUTE_TIMEOUT) +EXECUTE_NOTEBOOK = $(TIME) $(NBCONVERT) $(NBCONVERT_OPTIONS) $(EXECUTE_OPTIONS) --to notebook --execute --output-dir=$(FULL_NOTEBOOKS) + +# Render +RENDER_NOTEBOOK = RENDER_HTML=1 $(NBCONVERT) $(NBCONVERT_OPTIONS) $(EXECUTE_OPTIONS) --to notebook --execute --output-dir=$(RENDERED_NOTEBOOKS) + + +# Zip +ZIP ?= zip +ZIP_OPTIONS = -r + + +# Short targets +# Default target is to build everything needed for publishing, +# such that we can run "make -k" in a loop +.PHONY: chapters web default +web default: html code test-code test-imports test-packages test-types slides +chapters: html + +# The book is recreated after any change to any source +.PHONY: book all and more +book $(PROJECT): book-html book-pdf +all: chapters pdf code slides book +and more: word markdown epub + +# Individual targets +.PHONY: html pdf python code slides word doc docx md markdown epub +.PHONY: full-notebooks full fulls rendered-notebooks rendered renders book-pdf book-html +html: ipypublish-chapters $(HTMLS) +pdf: ipypublish-chapters $(PDFS) +nbpdf: ipypublish-chapters $(NBPDFS) +python code: $(PYS) +slides: $(SLIDES) +word doc docx: $(WORDS) +md markdown: $(MARKDOWNS) +epub: $(EPUBS) +full-notebooks full fulls: $(FULLS) +rendered-notebooks rendered renders: $(RENDERS) + +book-pdf $(PROJECT)-pdf: ipypublish-book $(BOOK_PDF) +book-html $(PROJECT)-html: ipypublish-book $(BOOK_HTML) + +.PHONY: ipypublish-book ipypublish-chapters +ifeq ($(PUBLISH),bookbook) +ipypublish-book: +ipypublish-chapters: +else +ifeq ($(PUBLISH),nbpublish) +ipypublish-book: +ipypublish-chapters: +else +ipypublish-book: + @echo "To create the book, you need the 'nbpublish' program." + @echo "This is part of the 'ipypublish' package" + @echo "at https://github.com/chrisjsewell/ipypublish" +ipypublish-chapters: + @echo "Warning: Using '$(NBCONVERT)' instead of '$(NBPUBLISH)'" + @echo "Documents will be created without citations and references" + @echo "Install the 'ipypublish' package" + @echo "from https://github.com/chrisjsewell/ipypublish" +endif +endif + +.PHONY: edit jupyter lab notebook +# Invoke notebook and editor: `make jupyter lab` +edit notebook: + $(JUPYTER) notebook + +lab: + $(JUPYTER) lab + +jupyter: + + +# Help +.PHONY: help +help: + @echo "Welcome to the '$(PROJECT)' Makefile!" + @echo "" + @echo "* make chapters (default) -> HTML and code for all chapters (notebooks)" + @echo "* make (pdf|html|code|slides|word|markdown) -> given subcategory only" + @echo "* make book -> entire book in PDF and HTML" + @echo "* make all -> all inputs in all output formats" + @echo "* make reformat -> reformat notebook Python code according to PEP8 guidelines" + @echo "* make style -> style checker" + @echo "* make crossref -> cross reference checker" + @echo "* make stats -> report statistics" + @echo "* make clean -> delete all derived files" + @echo "" + @echo "Created files end here:" + @echo "* PDFs -> '$(PDF_TARGET)', HTML -> '$(HTML_TARGET)', Python code -> '$(CODE_TARGET)', Slides -> '$(SLIDES_TARGET)'" + @echo "* Web site files -> '$(DOCS_TARGET)'" + @echo "" + @echo "Publish:" + @echo "* make docs -> Create public version of current documents" + @echo "* make beta -> Create beta version of current documents" + @echo "* make publish-all -> Add docs to git, preparing for publication" + @echo "" + @echo "Settings:" + @echo "* Use make PUBLISH=(nbconvert|nbpublish|bookbook) to choose a converter" + @echo " (default: automatic)" + +# Run a notebook, (re)creating all output cells +ADD_METADATA = $(SHARED)utils/add_metadata.py +NBAUTOSLIDE = $(SHARED)utils/nbautoslide.py +NBSYNOPSIS = $(SHARED)utils/nbsynopsis.py +NBSHORTEN = $(SHARED)utils/nbshorten.py + +COMMIT_SYNOPSIS = -git commit -m "Update synopsis" $(NOTEBOOKS)/PICS/*synopsis* + +$(FULL_NOTEBOOKS)/%.ipynb: $(NOTEBOOKS)/%.ipynb $(DEPEND_TARGET)%.makefile $(ADD_METADATA) $(NBAUTOSLIDE) $(NBSYNOPSIS) + $(EXECUTE_NOTEBOOK) $< + $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $@ > $@~ && mv $@~ $@ + $(PYTHON) $(NBAUTOSLIDE) --in-place $@ + $(PYTHON) $(NBSYNOPSIS) --project $(PROJECT) --update $@ + $(COMMIT_SYNOPSIS) + +$(RENDERED_NOTEBOOKS)/%.ipynb: $(NOTEBOOKS)/%.ipynb $(DEPEND_TARGET)%.makefile $(ADD_METADATA) $(SHARED)$(NBAUTOSLIDE) $(SHARED)$(NBSYNOPSIS) $(SHARED)$(NBSHORTEN) $(NOTEBOOKS)/$(UTILS)/__init__.py + $(RENDER_NOTEBOOK) $< + $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $@ > $@~ && mv $@~ $@ + $(PYTHON) $(NBAUTOSLIDE) --in-place $@ + RENDER_HTML=1 $(PYTHON) $(NBSYNOPSIS) --project $(PROJECT) --update $@ + $(COMMIT_SYNOPSIS) + $(PYTHON) $(NBSHORTEN) --link-to "$(SITE)/html/" --in-place $@ + +$(FULL_NOTEBOOKS)/$(UTILS): + $(MKDIR) $(FULL_NOTEBOOKS)/$(UTILS) + +$(FULL_NOTEBOOKS)/$(UTILS)/%: $(NOTEBOOKS)/$(UTILS)/% + @test -d $(FULL_NOTEBOOKS)/$(UTILS) || \ + $(MKDIR) $(FULL_NOTEBOOKS)/$(UTILS) + cp -pr $< $@ + + + +# Conversion rules - chapters +ifeq ($(LATEX),pdflatex) +# Use PDFLaTeX +$(PDF_TARGET)%.pdf: $(PDF_TARGET)%.tex $(BIB) + @echo Running LaTeX... + @-test -L $(PDF_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(PDF_TARGET) + cd $(PDF_TARGET) && $(PDFLATEX) $* + -cd $(PDF_TARGET) && $(BIBTEX) $* + cd $(PDF_TARGET) && $(PDFLATEX) $* + cd $(PDF_TARGET) && $(PDFLATEX) $* + @cd $(PDF_TARGET) && $(RM) $*.aux $*.bbl $*.blg $*.log $*.out $*.toc $*.frm $*.lof $*.lot $*.fls + @cd $(PDF_TARGET) && $(RM) -r $*_files + @echo Created $@ + @$(OPEN) $@ +else +# Use LaTeXMK +$(PDF_TARGET)%.pdf: $(PDF_TARGET)%.tex $(BIB) + @echo Running LaTeXMK... + @-test -L $(PDF_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(PDF_TARGET) + cd $(PDF_TARGET) && $(LATEXMK) $(LATEXMK_OPTS) $* + @cd $(PDF_TARGET) && $(RM) $*.aux $*.bbl $*.blg $*.log $*.out $*.toc $*.frm $*.lof $*.lot $*.fls $*.fdb_latexmk $*.xdv + @echo Created $@ + @$(OPEN) $@ +endif + +# Keep the .tex files +.PRECIOUS: $(PDF_TARGET)%.tex + +POST_TEX = utils/post_tex + +$(PDF_TARGET)%.tex: $(RENDERED_NOTEBOOKS)/%.ipynb $(BIB) $(PUBLISH_PLUGINS) $(SHARED)$(ADD_METADATA) $(SHARED)$(POST_TEX) + $(eval TMPDIR := $(shell mktemp -d)) + $(PYTHON) $(ADD_METADATA) --project $(PROJECT) --titlepage $< > $(TMPDIR)/$(notdir $<) + cp -pr $(NOTEBOOKS)/PICS $(BIB) $(TMPDIR) + $(CONVERT_TO_TEX) $(TMPDIR)/$(notdir $<) + $(POST_TEX) $@ > $@~ && mv $@~ $@ + @-$(RM) -fr $(TMPDIR) + @cd $(PDF_TARGET) && $(RM) $*.nbpub.log + + +POST_HTML_OPTIONS = $(BETA_FLAG) \ + --project="$(PROJECT)" \ + --title="$(BOOKTITLE)" \ + --authors="$(AUTHORS)" \ + --twitter="$(TWITTER)" \ + --all-chapters="$(ALL_CHAPTER_SOURCES) $(APPENDICES_SOURCES)" \ + --public-chapters="$(CHAPTER_SOURCES) $(APPENDICES_SOURCES)" \ + --ready-chapters="$(READY_SOURCES)" \ + --todo-chapters="$(TODO_SOURCES)" \ + --new-chapters="$(NEW_SOURCES)" + +HTML_DEPS = $(BIB) $(PUBLISH_PLUGINS) $(SHARED)utils/post_html.py $(CHAPTERS_MAKEFILE) $(BIBCHECK) + +# Check bib +BIBER = biber +BIBCHECK = .$(BIB).ascii .$(BIB).python .$(BIB).biber +checkbib check-bib: $(BIBCHECK) + @echo "Check completed; $(BIB) is ok" + +check-bib-ascii: .$(BIB).ascii +.$(BIB).ascii: $(BIB) + @echo "Checking $(BIB) for 7-bit ASCII encoding" + @if grep -Hn '[^[:print:]]' fuzzingbook.bib; then false; fi + @touch $@ + +check-bib-python: .$(BIB).python +.$(BIB).python: $(BIB) + @echo "Checking $(BIB) for Python usage with bibtexparser" + @$(PYTHON) -W error -c 'import bibtexparser; fd = open("$(BIB)"); bibtexparser.load(fd); fd.close()' + @touch $@ + +check-bib-biber: .$(BIB).biber +.$(BIB).biber: $(BIB) + @echo "Checking $(BIB) for LaTeX usage with Biber" + @$(BIBER) --tool --validate-datamodel --quiet $(BIB) + @$(RM) fuzzingbook_bibertool.bib fuzzingbook.bib.blg + @touch .$(BIB).biber + +.PHONY: checkbib check-bib check-bib-ascii check-bib-python check-bib-biber + + +# index.html comes with relative links (html/) such that the beta version gets the beta menu +$(DOCS_TARGET)index.html: \ + $(FULL_NOTEBOOKS)/index.ipynb $(HTML_DEPS) + @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) + @test -d $(HTML_TARGET) || $(MKDIR) $(HTML_TARGET) + $(CONVERT_TO_HTML) $< + mv $(HTML_TARGET)index.html $@ + @cd $(HTML_TARGET) && $(RM) -r index.nbpub.log index_files + $(PYTHON) $(SHARED)utils/post_html.py --menu-prefix=html/ --home $(POST_HTML_OPTIONS)$(HOME_POST_HTML_OPTIONS) $@ + @$(OPEN) $@ + +# 404.html comes with absolute links (/html/) such that it works anywhare +# https://help.github.com/articles/creating-a-custom-404-page-for-your-github-pages-site/ +$(DOCS_TARGET)404.html: $(FULL_NOTEBOOKS)/404.ipynb $(HTML_DEPS) + @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) + @test -d $(HTML_TARGET) || $(MKDIR) $(HTML_TARGET) + $(CONVERT_TO_HTML) $< + mv $(HTML_TARGET)404.html $@ + @cd $(HTML_TARGET) && $(RM) -r 404.nbpub.log 404_files + $(PYTHON) $(SHARED)utils/post_html.py --menu-prefix=/html/ --home $(POST_HTML_OPTIONS) $@ + (echo '---'; echo 'permalink: /404.html'; echo '---'; cat $@) > $@~ && mv $@~ $@ + @$(OPEN) $@ + +$(DOCS_TARGET)html/00_Index.html: $(DOCS_TARGET)notebooks/00_Index.ipynb $(HTML_DEPS) + $(CONVERT_TO_HTML) $< + @cd $(HTML_TARGET) && $(RM) -r 00_Index.nbpub.log 00_Index_files + @cd $(DOCS_TARGET)html && $(RM) -r 00_Index.nbpub.log 00_Index_files + mv $(HTML_TARGET)00_Index.html $@ + $(PYTHON) $(SHARED)utils/post_html.py $(POST_HTML_OPTIONS) $@ + +$(DOCS_TARGET)html/00_Table_of_Contents.html: $(DOCS_TARGET)notebooks/00_Table_of_Contents.ipynb $(SITEMAP_SVG) + $(CONVERT_TO_HTML) $< + @cd $(HTML_TARGET) && $(RM) -r 00_Table_of_Contents.nbpub.log 00_Table_of_Contents_files + @cd $(DOCS_TARGET)html && $(RM) -r 00_Table_of_Contents.nbpub.log 00_Table_of_Contents_files + mv $(HTML_TARGET)00_Table_of_Contents.html $@ + $(PYTHON) $(SHARED)utils/post_html.py $(POST_HTML_OPTIONS) $@ + @$(OPEN) $@ + +$(HTML_TARGET)%.html: $(FULL_NOTEBOOKS)/%.ipynb $(HTML_DEPS) + @test -d $(HTML_TARGET) || $(MKDIR) $(HTML_TARGET) + $(CONVERT_TO_HTML) $< + @cd $(HTML_TARGET) && $(RM) $*.nbpub.log $*_files/$(BIB) + $(PYTHON) $(SHARED)utils/post_html.py $(POST_HTML_OPTIONS) $@ + @-test -L $(HTML_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(HTML_TARGET) + @$(OPEN) $@ + +$(SLIDES_TARGET)%.slides.html: $(FULL_NOTEBOOKS)/%.ipynb $(BIB) $(NBSHORTEN) + @test -d $(SLIDES_TARGET) || $(MKDIR) $(SLIDES_TARGET) + $(eval TMPDIR := $(shell mktemp -d)) + sed 's/\.ipynb)/\.slides\.html)/g' $< > $(TMPDIR)/$(notdir $<) + $(PYTHON) $(NBSHORTEN) --skip-slides --in-place $(TMPDIR)/$(notdir $<) + $(CONVERT_TO_SLIDES) $(TMPDIR)/$(notdir $<) + @cd $(SLIDES_TARGET) && $(RM) $*.nbpub.log $*_files/$(BIB) + @-test -L $(HTML_TARGET)PICS || ln -s ../$(NOTEBOOKS)/PICS $(HTML_TARGET) + @-$(RM) -fr $(TMPDIR) + @$(OPEN) $@ + + +# Rules for beta targets +.FORCE: +ifndef BETA +beta/%: .FORCE + @$(MAKE) BETA=beta $(@:beta/=) + +$(DOCS_TARGET)beta/%: .FORCE + @$(MAKE) BETA=beta $(@:beta/=) + +%-beta: .FORCE + @$(MAKE) BETA=beta $(@:-beta=) + +%-all: % %-beta + @true + +.PHONY: beta +beta: default-beta +else: +beta: +endif + + +# Reconstructing the reveal.js dir +.PHONY: reveal.js +$(REVEAL_JS) reveal.js: .FORCE + @-test -d "$@" || (cd $(SLIDES_TARGET); \ + git submodule add https://github.com/hakimel/reveal.js.git) + @git submodule update --remote + +$(CODE_TARGET)setup.py: $(CODE_TARGET)setup.py.in + cat $< > $@ + chmod +x $@ + +$(CODE_TARGET)__init__.py: $(CODE_TARGET)__init__.py.in $(CHAPTERS_MAKEFILE) + cat $< > $@ + (for module in $(IMPORTS); do echo from . import $$module; done) | grep -v '^.*[0-9][0-9]_.*' >> $@ + chmod +x $@ + +# For code, we comment out fuzzingbook/debuggingbook imports, +# ensuring we import a .py and not the .ipynb file +$(CODE_TARGET)%.py: $(FULL_NOTEBOOKS)/%.ipynb $(EXPORT_NOTEBOOK_CODE) + @test -d $(CODE_TARGET) || $(MKDIR) $(CODE_TARGET) + $(CONVERT_TO_PYTHON) --project $(PROJECT) $< > $@~ && mv $@~ $@ + # $(AUTOPEP8) $(AUTOPEP8_OPTIONS) $@ + -chmod +x $@ + +$(MYPY_TARGET)%.py: $(NOTEBOOKS)/%.ipynb $(EXPORT_NOTEBOOK_CODE) + @test -d $(MYPY_TARGET) || $(MKDIR) $(MYPY_TARGET) + $(CONVERT_TO_PYTHON) --project $(PROJECT) --mypy $< > $@~ && mv $@~ $@ + +# Markdown +$(MARKDOWN_TARGET)%.md: $(RENDERED_NOTEBOOKS)/%.ipynb $(BIB) + $(RM) -r $(MARKDOWN_TARGET)$(basename $(notdir $<)).md $(MARKDOWN_TARGET)$(basename $(notdir $<))_files + $(CONVERT_TO_MARKDOWN) $< + +# For word, we convert from the HTML file +$(WORD_TARGET)%.docx: $(HTML_TARGET)%.html $(WORD_TARGET)pandoc.css + $(PANDOC) --css=$(WORD_TARGET)pandoc.css $< -o $@ + +# Epub comes from the markdown file +$(EPUB_TARGET)%.epub: $(MARKDOWN_TARGET)%.md + cd $(MARKDOWN_TARGET); $(PANDOC) -o ../$@ ../$< + + +# NBPDF files - generated from HMTL, with embedded notebooks +# See instructions at https://github.com/betatim/notebook-as-pdf +HTMLTONBPDF = $(SHARED)utils/htmltonbpdf.py + +$(NBPDF_TARGET)%.pdf: $(HTML_TARGET)/%.html $(RENDERED_NOTEBOOKS)/%.ipynb $(HTMLTONBPDF) $(HTML_TARGET)custom.css + @test -d $(NBPDF_TARGET) || $(MKDIR) $(NBPDF_TARGET) + $(PYTHON) $(HTMLTONBPDF) --attach --fix-html-links $${PWD}/$(HTML_TARGET)$(basename $(notdir $<)).html $(RENDERED_NOTEBOOKS)/$(basename $(notdir $<)).ipynb $@ + sed "s!$(HTML_TARGET)!$(NBPDF_TARGET)!g" $@ > $@~ && mv $@~ $@ + + +# Conversion rules - entire book +# We create a fuzzingbook/ or debuggingbook/ folder +# with the chapters ordered by number, +# and let the fuzzingbook converters run on this +ifeq ($(PUBLISH),nbpublish) +# With nbpublish +$(PDF_TARGET)$(BOOK).tex: $(RENDERS) $(BIB) $(PUBLISH_PLUGINS) $(CHAPTERS_MAKEFILE) + -$(RM) -r $(BOOK) + $(MKDIR) $(BOOK) + chapter=0; \ + for file in $(SOURCE_FILES); do \ + chnum=$$(printf "%02d" $$chapter); \ + ln -s ../$(RENDERED_NOTEBOOKS)/$$file $(BOOK)/$$(echo $$file | sed 's/.*/Ch'$${chnum}'_&/g'); \ + chapter=$$(expr $$chapter + 1); \ + done + ln -s ../$(BIB) $(BOOK) + $(NBPUBLISH) $(NBPUBLISH_OPTIONS) -f latex_ipypublish_book --outpath $(PDF_TARGET) $(BOOK) + $(POST_TEX) $@ > $@~ && mv $@~ $@ + $(RM) -r $(BOOK) + cd $(PDF_TARGET) && $(RM) $(BOOK).nbpub.log + @echo Created $@ + +$(HTML_TARGET)$(BOOK).html: $(FULLS) $(BIB) $(SHARED)utils/post_html.py + -$(RM) -r $(BOOK) + $(MKDIR) $(BOOK) + chapter=0; \ + for file in $(SOURCE_FILES); do \ + chnum=$$(printf "%02d" $$chapter); \ + ln -s ../$(FULL_NOTEBOOKS)/$$file $(BOOK)/$$(echo $$file | sed 's/.*/Ch'$${chnum}'_&/g'); \ + chapter=$$(expr $$chapter + 1); \ + done + ln -s ../$(BIB) $(BOOK) + $(CONVERT_TO_HTML) $(BOOK) + $(PYTHON) $(SHARED)utils/nbmerge.py $(BOOK)/Ch*.ipynb > notebooks/$(BOOK).ipynb + $(PYTHON) $(SHARED)utils/post_html.py $(BETA_FLAG) $(POST_HTML_OPTIONS) $@ + $(RM) -r $(BOOK) notebooks/$(BOOK).ipynb + cd $(HTML_TARGET) && $(RM) $(BOOK).nbpub.log $(BOOK)_files/$(BIB) + @echo Created $@ +else +# With bookbook +$(PDF_TARGET)$(BOOK).tex: $(RENDERS) $(BIB) $(PUBLISH_PLUGINS) $(CHAPTERS_MAKEFILE) + -$(RM) -r $(BOOK) + $(MKDIR) $(BOOK) + chapter=0; \ + for file in $(SOURCE_FILES); do \ + chnum=$$(printf "%02d" $$chapter); \ + ln -s ../$(RENDERED_NOTEBOOKS)/$$file book/$$(echo $$file | sed 's/.*/'$${chnum}'-&/g'); \ + chapter=$$(expr $$chapter + 1); \ + done + cd book; $(BOOKBOOK_LATEX) + mv book/combined.tex $@ + $(POST_TEX) $@ > $@~ && mv $@~ $@ + $(RM) -r book + @echo Created $@ + +$(HTML_TARGET)book.html: $(FULLS) $(BIB) $(PUBLISH_PLUGINS) + -$(RM) -r book + $(MKDIR) book + for file in $(SOURCE_FILES); do \ + ln -s ../$(FULL_NOTEBOOKS)/$$file book/$$(echo $$file | sed 's/[^-0-9]*\([-0-9][0-9]*\)_\(.*\)/\1-\2/g'); \ + done + cd book; $(BOOKBOOK_HTML) + mv book/html/index.html $@ + mv book/html/*.html $(HTML_TARGET) + $(RM) -r book + @echo Created $@ +endif + + +## Some checks + +# Style checks +.PHONY: style check-style checkstyle +style check-style checkstyle: $(PYS) $(PYCODESTYLE_CFG) + $(PYCODESTYLE) --config $(PYCODESTYLE_CFG) $(PYS) + @echo "All style checks passed." + +# Automatic formatting +.PHONY: autopep8 reformat +autopep8 reformat: $(PYCODESTYLE_CFG) + $(NBAUTOPEP8) --split-cells --jobs -1 $(AUTOPEP8_OPTIONS) $(SOURCES) + @echo "Code reformatting complete. Use 'make full' to re-execute and test notebooks." + + +# List of Cross References +.PHONY: check-crossref crossref xref +check-crossref crossref xref: $(SOURCES) + @echo "Referenced notebooks (* = missing)" + @files=$$(grep '\.ipynb)' $(SOURCES) | sed 's/.*[(]\([a-zA-Z0-9_][a-zA-Z0-9_-]*\.ipynb\)[)].*/\1/' | grep -v http | sort | uniq); \ + for file in $$files; do \ + if [ -f $(NOTEBOOKS)/$$file ]; then \ + echo ' ' $$file; \ + else \ + echo '* ' $$file "- in" $$(cd $(NOTEBOOKS); grep -l $$file $(SOURCE_FILES)); \ + fi \ + done + + +# Stats +.PHONY: stats +stats: $(SOURCES) + @cd $(NOTEBOOKS); ../$(SHARED)utils/nbstats.py $(SOURCE_FILES) + +# Run all code. This should produce no failures. +PY_SUCCESS_MAGIC = "--- Code check passed ---" +PYS_OUT = $(SOURCE_FILES:%.ipynb=$(CODE_TARGET).%.py.out) +$(CODE_TARGET).%.py.out: $(CODE_TARGET)%.py + @echo Running $<... + @if $(PYTHON) -W error $< > $@ 2>&1; then \ + echo $(PY_SUCCESS_MAGIC) >> $@; \ + exit 0; \ + else \ + echo "Error while running $<" >> $@; \ + tail $@; \ + touch -r $< $@; \ + touch -A -010000 $@; \ + exit 1; \ + fi + +# No need to check if Tracking.py works; it's not run by users anyway +$(CODE_TARGET).Tracking.py.out: $(CODE_TARGET)Tracking.py + @echo Skipping $<... + @echo $(PY_SUCCESS_MAGIC) > $@ + +.PHONY: test-code +test-code: code $(PYS_OUT) + +.PHONY: check-code +check-code: test-code + @files_with_errors=$$(grep --files-without-match -- $(PY_SUCCESS_MAGIC) $(PYS_OUT)); \ + if [ -z "$$files_with_errors" ]; then \ + echo "All code checks passed."; \ + else \ + echo "Check these files for errors: $$files_with_errors"; \ + exit 1; \ + fi + +# Import all code. This should produce no output (or error messages). +IMPORTS = $(subst .ipynb,,$(CHAPTERS) $(APPENDICES)) +IMPORTS_OUT = $(CODE_TARGET).import_all.py.out + +.PHONY: test-import test-imports +test-import test-imports: code $(IMPORTS_OUT) + +.PHONY: check-import check-imports +check-import check-imports: test-imports + @echo "All import checks passed." + +$(IMPORTS_OUT): $(PYS) + @echo "#!/usr/bin/env $(PYTHON)" > import_all.py + @(for module in $(IMPORTS); do echo import code.$$module; done) | grep -v '^.*[0-9][0-9]_.*' >> import_all.py + $(PYTHON) import_all.py 2>&1 | tee $@ + @$(RM) import_all.py + @test ! -s $@ + +# Same as above, but using Python standard packages only; import should work too +check-standard-imports: code + # PYTHONPATH= $(MAKE) check-imports + +PACKAGES_OUT = $(CODE_TARGET).import_packages.py.out +.PHONY: test-packages +test-packages: $(PACKAGES_OUT) + +check-package check-packages: test-packages + @echo "Package check passed." + +$(PACKAGES_OUT): $(PYS) + @echo "#!/usr/bin/env $(PYTHON)" > import_packages.py + @(for module in $(IMPORTS); do echo import code.$$module; done) | grep -v '^import code.[0-9][0-9]' >> import_packages.py + $(PYTHON) import_packages.py 2>&1 | tee $@ + @$(RM) import_packages.py + @test ! -s $@ + + +# Static type checking +MYPY = mypy +# MYPYS = $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET)%.py) +MYPYS_OUT = $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET).%.py.out) +$(MYPY_TARGET).%.py.out: $(MYPY_TARGET)%.py $(MYPY_TARGET)/mypy.ini + @echo Type-checking $<... + @if $(MYPY) --config-file $(MYPY_TARGET)/mypy.ini $< > $@ 2>&1; then \ + echo $(PY_SUCCESS_MAGIC) >> $@; \ + exit 0; \ + else \ + echo "Error type checking $<" >> $@; \ + tail $@; \ + touch -r $< $@; \ + touch -A -010000 $@; \ + exit 1; \ + fi + +UTILS_MYPY_OUT = $(MYPY_TARGET).$(UTILS).py.out +$(UTILS_MYPY_OUT): $(UTILITY_FILES:%=$(NOTEBOOKS)/$(UTILS)/%) + @echo Type-checking $(NOTEBOOKS)/$(UTILS)... + @if $(MYPY) --config-file $(MYPY_TARGET)/mypy.ini $(NOTEBOOKS)/$(UTILS) > $@ 2>&1; then \ + echo $(PY_SUCCESS_MAGIC) >> $@; \ + exit 0; \ + else \ + echo "Error type checking $<" >> $@; \ + tail $@; \ + touch -r $< $@; \ + touch -A -010000 $@; \ + exit 1; \ + fi + +test-types: $(SOURCE_FILES:%.ipynb=$(MYPY_TARGET)%.py) \ + $(UTILS_MYPY_OUT) $(MYPYS_OUT) + +check-types: test-types + @files_with_errors=$$(grep --files-without-match -- $(PY_SUCCESS_MAGIC) $(MYPYS_OUT) $(UTILS_MYPY_OUT)); \ + if [ -z "$$files_with_errors" ]; then \ + echo "All type checks passed."; \ + else \ + echo "Check these files for errors: $$files_with_errors"; \ + exit 1; \ + fi + +.PHONY: run +run: check-imports check-standard-imports check-package check-types check-code + +# Todo checks +check-todo todo: + @grep '\\todo' $(ALL_CHAPTER_SOURCES); \ + if [ $$? = 0 ]; then exit 1; else \ + echo "No todos in $(PUBLIC_CHAPTERS:%.ipynb=%) $(READY_CHAPTERS:%.ipynb=%)"; exit 0; fi + +# Spell checks +NBSPELLCHECK = $(SHARED)utils/nbspellcheck.py +.PHONY: spell spellcheck check-spell +spell spellcheck check-spell: + $(NBSPELLCHECK) $(SOURCES) + + +# All checks +.PHONY: check check-all +check check-all: check-import check-package check-types check-code check-style check-crossref check-todo + +# Add notebook metadata (add table of contents, bib reference, etc.) +.PHONY: metadata +metadata: $(ADD_METADATA) + @for notebook in $(SOURCES); do \ + echo "Adding metadata to $$notebook...\c"; \ + $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $$notebook > $$notebook~ || exit 1; \ + if diff $$notebook $$notebook~; then \ + echo "unchanged."; \ + else \ + mv $$notebook~ $$notebook; \ + echo "done."; \ + fi; \ + $(RM) $$notebook~; \ + done + + +## Publishing +.PHONY: docs +docs: publish-notebooks publish-index publish-html publish-code publish-dist \ + publish-slides publish-pics \ + $(DOCS_TARGET)index.html $(DOCS_TARGET)404.html README.md binder/postBuild + @echo "Now use 'make publish-all' to commit changes to docs." + +# github does not like script tags; +# links to notebooks need to get adapted +README.md: $(MARKDOWN_TARGET)index.md Makefile + sed 's!!!g' $< | \ + sed 's!(\([_a-zA-Z0-9]*\).ipynb)!($(SITE)/html/\1.html)!g'> $@ + +.PHONY: publish +publish: run quick-publish +quick-publish: docs + git add $(DOCS_TARGET)* binder/postBuild README.md \ + $(NOTEBOOKS)/PICS/*-synopsis-* + -git status + -git commit -m "Doc update" + @echo "Now use 'make push' to place docs on website and trigger a mybinder update" + +# Add/update HTML code in Web pages +.PHONY: publish-html publish-html-setup +publish-html: html publish-html-setup \ + $(DOCS_TARGET)html/00_Index.html \ + $(DOCS_TARGET)html/00_Table_of_Contents.html \ + $(DOCS_TARGET)html/custom.css \ + $(DOCS_TARGET)html/favicon \ + $(DOCS:%=$(DOCS_TARGET)html/%.html) \ + $(DOCS:%=$(DOCS_TARGET)html/%_files) + +publish-html-setup: + @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) + @test -d $(DOCS_TARGET)html || $(MKDIR) $(DOCS_TARGET)html + +$(DOCS_TARGET)html/%: $(HTML_TARGET)% + $(RM) -r $@ + cp -pr $< $@ + +# Add/update Python code on Web pages +.PHONY: publish-code publish-code-setup +publish-code: code publish-code-setup \ + $(DOCS_TARGET)code/LICENSE.md \ + $(DOCS_TARGET)code/README.md \ + $(DOCS_TARGET)code/setup.py \ + $(DOCS_TARGET)code/__init__.py \ + $(UTILITY_FILES:%=$(DOCS_TARGET)code/$(UTILS)/%) \ + $(PUBLIC_CHAPTERS:%.ipynb=$(DOCS_TARGET)code/%.py) \ + $(APPENDICES:%.ipynb=$(DOCS_TARGET)code/%.py) + +publish-code-setup: + @test -d $(DOCS_TARGET) \ + || $(MKDIR) $(DOCS_TARGET) + @test -d $(DOCS_TARGET)code \ + || $(MKDIR) $(DOCS_TARGET)code + @test -d $(DOCS_TARGET)code/$(UTILS) \ + || $(MKDIR) $(DOCS_TARGET)code/$(UTILS) + +$(DOCS_TARGET)code/%: $(CODE_TARGET)% + cp -pr $< $@ + +.PHONY: dist publish-dist +dist publish-dist: check-import check-package check-code publish-code toc \ + $(DOCS_TARGET)dist/$(PROJECT)-code.zip \ + $(DOCS_TARGET)dist/$(PROJECT)-notebooks.zip + +DIST_CODE_FILES = \ + $(DOCS_TARGET)code/README.md \ + $(DOCS_TARGET)code/LICENSE.md \ + $(DOCS_TARGET)code/setup.py \ + $(DOCS_TARGET)code/__init__.py + +check-install: + $(eval TMPDIR := $(shell mktemp -d)) + @cd $(TMPDIR); \ + $(PYTHON) -c 'import $(PROJECT)' 2> /dev/null; \ + if [ $$? = 0 ]; then \ + echo "Error: Installed $(PROJECT) package conflicts with package creation" >&2; \ + echo "Please uninstall it; e.g. with 'pip uninstall $(PROJECT)'." >&2; \ + exit 1; \ + else \ + exit 0; \ + fi + +clean-dist: + $(RM) -r code/__pycache__ + $(RM) -r code/$(UTILS)/__pycache__ + $(RM) -r $(DOCS_TARGET)notebooks/$(PROJECT)/__pycache__ + $(RM) -r $(DOCS_TARGET)notebooks/$(UTILS)/__pycache__ + $(RM) -r $(DOCS_TARGET)code/$(UTILS)/__pycache__ + $(RM) -r $(DOCS_TARGET)notebooks/.ipynb_checkpoints + +$(DOCS_TARGET)dist/$(PROJECT)-code.zip: \ + $(PYS) $(DIST_CODE_FILES) $(CHAPTERS_MAKEFILE) \ + check-install clean-dist + @-mkdir $(DOCS_TARGET)dist + $(RM) -r $(DOCS_TARGET)dist/* + $(RM) -r $(DOCS_TARGET)$(PROJECT) + mkdir $(DOCS_TARGET)$(PROJECT) + ln -s ../code $(DOCS_TARGET)$(PROJECT)/$(PROJECT) + mv $(DOCS_TARGET)$(PROJECT)/$(PROJECT)/setup.py $(DOCS_TARGET)$(PROJECT) + mv $(DOCS_TARGET)$(PROJECT)/$(PROJECT)/README.md $(DOCS_TARGET)$(PROJECT) + cd $(DOCS_TARGET)$(PROJECT); PYTHONPATH= $(PYTHON) ./setup.py sdist + mv $(DOCS_TARGET)$(PROJECT)/dist/* $(DOCS_TARGET)dist + # mv $(DOCS_TARGET)$(PROJECT)/*.egg-info $(DOCS_TARGET)dist + $(RM) -r $(DOCS_TARGET)$(PROJECT)/*.egg-info + $(RM) -r $(DOCS_TARGET)$(PROJECT)/dist $(DOCS_TARGET)$(PROJECT)/build + cd $(DOCS_TARGET); $(ZIP) $(ZIP_OPTIONS) $(PROJECT)-code.zip $(PROJECT) + mv $(DOCS_TARGET)$(PROJECT)-code.zip $(DOCS_TARGET)dist + $(RM) -r $(DOCS_TARGET)$(PROJECT) $(DOCS_TARGET)code/$(PROJECT) + $(RM) -r $(DOCS_TARGET)code/dist $(DOCS_TARGET)code/*.egg-info + @echo "Created code distribution files in $(DOCS_TARGET)dist" + +$(DOCS_TARGET)dist/$(PROJECT)-notebooks.zip: $(FULLS) $(CHAPTERS_MAKEFILE) \ + clean-dist + cd $(DOCS_TARGET); ln -s notebooks $(PROJECT)-notebooks + cd $(DOCS_TARGET); \ + $(ZIP) $(ZIP_OPTIONS) $(PROJECT)-notebooks.zip $(PROJECT)-notebooks + $(RM) $(DOCS_TARGET)/$(PROJECT)-notebooks + cd $(DOCS_TARGET); \ + for file in $(EXTRAS); do \ + $(ZIP) $(PROJECT)-notebooks.zip -d $(PROJECT)-notebooks/$$file; \ + done + mv $(DOCS_TARGET)$(PROJECT)-notebooks.zip $@ + @echo "Created notebook distribution files in $(DOCS_TARGET)dist" + + +# Add/update slides on Web pages +.PHONY: publish-slides publish-slides-setup +publish-slides: slides publish-slides-setup \ + $(PUBLIC_CHAPTERS:%.ipynb=$(DOCS_TARGET)slides/%.slides.html) \ + $(APPENDICES:%.ipynb=$(DOCS_TARGET)slides/%.slides.html) \ + $(REVEAL_JS) $(DOCS_TARGET)slides/reveal.js + @-rm -fr $(DOCS_TARGET)slides/.git + +publish-slides-setup: + @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) + @test -d $(DOCS_TARGET)slides || $(MKDIR) $(DOCS_TARGET)slides + +$(DOCS_TARGET)slides/%: $(SLIDES_TARGET)% + -rm -fr $@ + cp -pr $< $@ + + +# Add/update notebooks on Web pages +.PHONY: publish-notebooks publish-notebooks-setup +publish-notebooks: full-notebooks publish-notebooks-setup \ + $(DOCS_TARGET)notebooks/custom.css \ + $(DOCS_TARGET)notebooks/$(BIB) \ + $(DOCS_TARGET)notebooks/LICENSE.md \ + $(DOCS_TARGET)notebooks/README.md \ + $(DOCS:%=$(DOCS_TARGET)notebooks/%.ipynb) \ + $(UTILITY_FILES:%=$(DOCS_TARGET)notebooks/$(UTILS)/%) + +publish-notebooks-setup: + @test -d $(DOCS_TARGET) \ + || $(MKDIR) $(DOCS_TARGET) + @test -d $(DOCS_TARGET)notebooks \ + || $(MKDIR) $(DOCS_TARGET)notebooks + @test -d $(DOCS_TARGET)notebooks/$(UTILS) \ + || $(MKDIR) $(DOCS_TARGET)notebooks/$(UTILS) + +$(DOCS_TARGET)notebooks/%: $(FULL_NOTEBOOKS)/% + cp -pr $< $@ + +.PHONY: publish-index +publish-index: $(DOCS_TARGET)notebooks/00_Index.ipynb + + +# Add/update pics on Web pages +.PHONY: publish-pics publish-pics-setup +publish-pics: publish-pics-setup $(NOTEBOOKS)/PICS + cp -pr $(NOTEBOOKS)/PICS $(DOCS_TARGET)notebooks + +publish-pics-setup: + @test -d $(DOCS_TARGET) || $(MKDIR) $(DOCS_TARGET) + @test -d $(DOCS_TARGET)PICS || $(MKDIR) $(DOCS_TARGET)PICS + $(RM) -fr $(DOCS_TARGET)html/PICS; ln -s ../$(NOTEBOOKS)/PICS $(DOCS_TARGET)html + $(RM) -fr $(DOCS_TARGET)slides/PICS; ln -s ../$(NOTEBOOKS)/PICS $(DOCS_TARGET)slides + + +# Table of contents +.PHONY: toc +toc: $(DOCS_TARGET)notebooks/00_Table_of_Contents.ipynb +$(DOCS_TARGET)notebooks/00_Table_of_Contents.ipynb: $(SHARED)utils/nbtoc.py \ + $(TOC_CHAPTERS:%=$(DOCS_TARGET)notebooks/%) \ + $(TOC_APPENDICES:%=$(DOCS_TARGET)notebooks/%) \ + $(CHAPTERS_MAKEFILE) \ + $(SITEMAP_SVG) + $(RM) $@ + $(PYTHON) $(SHARED)utils/nbtoc.py \ + --title="$(BOOKTITLE)" \ + --chapters="$(TOC_CHAPTERS:%=$(DOCS_TARGET)notebooks/%)" \ + --appendices="$(TOC_APPENDICES:%=$(DOCS_TARGET)notebooks/%)" > $@ + $(EXECUTE_NOTEBOOK) $@ && mv $(FULL_NOTEBOOKS)/00_Table_of_Contents.ipynb $@ + $(PYTHON) $(ADD_METADATA) --project $(PROJECT) $@ > $@~ && mv $@~ $@ + $(JUPYTER) trust $@ + @$(OPEN) $@ + + +# Index +.PHONY: index +index: $(DOCS_TARGET)notebooks/00_Index.ipynb $(DOCS_TARGET)/html/00_Index.html +$(DOCS_TARGET)notebooks/00_Index.ipynb: $(SHARED)utils/nbindex.py \ + $(TOC_CHAPTERS:%=$(DOCS_TARGET)notebooks/%) \ + $(TOC_APPENDICES:%=$(DOCS_TARGET)notebooks/%) \ + $(CHAPTERS_MAKEFILE) + (cd $(NOTEBOOKS); $(PYTHON) ../$(SHARED)utils/nbindex.py $(TOC_CHAPTERS) $(APPENDICES)) > $@ + @$(OPEN) $@ + +## Synopsis +update-synopsis synopsis: + $(PYTHON) $(NBSYNOPSIS) --project $(PROJECT) --update $(ALL_CHAPTER_SOURCES) + $(COMMIT_SYNOPSIS) + +no-synopsis: + @echo Chapters without synopsis: + @grep -L '## Synopsis' $(ALL_CHAPTER_SOURCES) | grep -v '[0-9]' + + +## Python packages +# After this, you can do 'pip install fuzzingbook / debuggingbook' +# and then 'from fuzzingbook.Fuzzer import Fuzzer' :-) +.PHONY: upload-dist +upload-dist: dist + @echo "Use your pypi.org password to upload" + cd $(DOCS_TARGET); twine upload dist/*.tar.gz + + + +## Binder services +# Make sure we have our custom.css in Binder, too +binder/postBuild: binder/postBuild.template $(HTML_TARGET)custom.css + cat binder/postBuild.template $(HTML_TARGET)custom.css > $@ + echo END >> $@ + chmod +x $@ + +# Force recreation of binder service; avoids long waiting times for first user +.PHONY: binder +binder: .FORCE + open $(BINDER_URL) + +# After a git push, we want binder to update; "make push" does this +.PHONY: push +push: .FORCE + git push + open $(BINDER_URL) + open $(PROJECT_URL) + +# Debugging binder +# This is the same system as mybinder uses, but should be easier to debug +# See https://repo2docker.readthedocs.io/en/latest/ +.PRECIOUS: binder/binder.log +.PHONY: binder-local debug-binder +binder-local debug-binder: binder/binder.log binder/postBuild +binder/binder.log: .FORCE + @echo Writing output to $@ + @docker version > /dev/null + jupyter-repo2docker --debug $(GITHUB_REPO) 2>&1 | tee $@ + + +## Docker services (experimental) +docker: + docker pull $(PROJECT)/student + -docker run -d -p 8888:8888 --name fuzzing-book-instance $(PROJECT)/student + +docker-start: + docker start fuzzing-book-instance + sleep 2 + @URL=$$(docker exec -it fuzzing-book-instance jupyter notebook list | grep http | awk '{ print $$1 }'); echo $$URL; open $$URL + +docker-stop: + docker stop fuzzing-book-instance + + +## Getting rid of stray processes and workspaces +kill: + -pkill -HUP -l -f jupyter-lab Firefox.app firefox-bin runserver + $(RM) $$HOME/.jupyter/lab/workspaces/*.jupyterlab-workspace + +## Cleanup +AUX = *.aux *.bbl *.blg *.log *.out *.toc *.frm *.lof *.lot *.fls *.fdb_latexmk \ + $(PDF_TARGET)*.aux \ + $(PDF_TARGET)*.bbl \ + $(PDF_TARGET)*.blg \ + $(PDF_TARGET)*.log \ + $(PDF_TARGET)*.out \ + $(PDF_TARGET)*.toc \ + $(PDF_TARGET)*.frm \ + $(PDF_TARGET)*.lof \ + $(PDF_TARGET)*.lot \ + $(PDF_TARGET)*.fls \ + $(PDF_TARGET)*.xdv \ + $(PDF_TARGET)*.fdb_latexmk + +.PHONY: clean-code clean-chapters clean-book clean-aux clean-pdf +clean-code: + $(RM) $(PYS) $(PYS_OUT) + +clean-chapters: + $(RM) $(TEXS) $(PDFS) $(HTMLS) $(SLIDES) $(WORDS) $(MARKDOWNS) + $(RM) -r $(PDF_FILES) $(HTML_FILES) $(SLIDES_FILES) + +clean-book: + $(RM) $(BOOK_TEX) $(BOOK_PDF) $(BOOK_HTML) + $(RM) -r $(BOOK_HTML_FILES) $(BOOK_PDF_FILES) + +clean-aux clean-pdf: + $(RM) $(AUX) + +.PHONY: clean-full-notebooks clean-full clean-fulls +.PHONY: clean-rendered-notebooks clean-rendered clean-renders +.PHONY: clean-docs clean realclean +clean-full-notebooks clean-full clean-fulls: + $(RM) $(FULLS) + +clean-rendered-notebooks clean-rendered clean-renders: + $(RM) $(RENDERS) + +clean-docs: + $(RM) -r $(DOCS_TARGET)html $(DOCS_TARGET)code \ + $(DOCS_TARGET)slides $(DOCS_TARGET)index.html $(DOCS_TARGET)404.html \ $(DOCS_TARGET)PICS $(DOCS_TARGET)notebooks + +clean: clean-code clean-chapters clean-book clean-aux clean-docs clean-fulls clean-renders + @echo "All derived files deleted" + +realclean: clean + cd $(PDF_TARGET); $(RM) *.pdf + cd $(HTML_TARGET); $(RM) *.html; $(RM) -r *_files + cd $(SLIDES_TARGET); $(RM) *.html + cd $(CODE_TARGET); $(RM) *.py *.py.out .*.py.out + cd $(MYPY_TARGET); $(RM) *.py *.py.out .*.py.out + cd $(WORD_TARGET); $(RM) *.docx + cd $(MARKDOWN_TARGET); $(RM) *.md + @echo "All old files deleted" + + +## A bit of Makefile debugging +# See http://www.drdobbs.com/tools/debugging-makefiles/197003338# + +# Use "make print-VAR" to see the value of VAR, e.g. "make print-NBDEPEND" +print-%: ; @$(error $* = $($*) (defined as $* = $(value $*) from $(origin $*))) + +# Use "make DEBUG=1" to get better diagnostics why a command gets executed +ifdef DEBUG +OLD_SHELL := $(SHELL) +SHELL = $(warning creating $@ from $^: $? is newer)$(OLD_SHELL) +endif + + +## Dependencies as graph +NBDEPEND = $(SHARED)utils/nbdepend.py +SITEMAP_OPTIONS = --graph --transitive-reduction --project $(PROJECT) # --cluster-by-parts + +sitemap: $(SITEMAP_SVG) +$(SITEMAP_SVG): $(CHAPTER_SOURCES) $(NBDEPEND) + $(PYTHON) $(NBDEPEND) $(SITEMAP_OPTIONS) $(CHAPTER_SOURCES) > $@~ && mv $@~ $@ + @$(OPEN) $@ + +$(HTML_TARGET)/Tours.html: $(SITEMAP_SVG) +$(FULL_NOTEBOOKS)/Tours.ipynb: $(SITEMAP_SVG) +$(RENDERED_NOTEBOOKS)/Tours.ipynb: $(SITEMAP_SVG) + +$(HTML_TARGET)/00_Table_of_Contents.html: $(SITEMAP_SVG) +$(FULL_NOTEBOOKS)/00_Table_of_Contents.ipynb: $(SITEMAP_SVG) +$(RENDERED_NOTEBOOKS)/00_Table_of_Contents.ipynb: $(SITEMAP_SVG) + + +## Dependencies - should come at the very end +# See http://make.mad-scientist.net/papers/advanced-auto-dependency-generation/ for inspiration +$(DEPEND_TARGET)%.makefile: $(NOTEBOOKS)/%.ipynb + @echo "Rebuilding $@" + @test -d $(DEPEND_TARGET) || $(MKDIR) $(DEPEND_TARGET) + @for import in $$($(PYTHON) $(NBDEPEND) $<); do \ + if [ -f $(NOTEBOOKS)/$$import.ipynb ]; then \ + notebooks="$$notebooks $$""(NOTEBOOKS)/$$import.ipynb"; \ + imports="$$imports $$""(CODE_TARGET)$$import.py"; \ + mypys="$$mypys $$""(MYPY_TARGET)$$import.py"; \ + fi; \ + done; \ + ( \ + echo '# $(basename $(notdir $<)) dependencies'; \ + echo ''; \ + echo '$$''(FULL_NOTEBOOKS)/$(notdir $<):' $$notebooks; \ + echo ''; \ + echo '$$''(RENDERED_NOTEBOOKS)/$(notdir $<):' $$notebooks; \ + echo ''; \ + echo '$$''(CODE_TARGET).$(notdir $(<:%.ipynb=.%.py.out)):' $$imports; \ + echo ''; \ + echo '$$''(MYPY_TARGET).$(notdir $(<:%.ipynb=.%.py.out)):' $$mypys; \ + ) > $@ + + +.PHONY: depend +depend: $(DEPENDS) + +include $(wildcard $(DEPENDS)) diff --git a/notebooks/shared/README.md b/notebooks/shared/README.md new file mode 100644 index 000000000..b899d0d6a --- /dev/null +++ b/notebooks/shared/README.md @@ -0,0 +1 @@ +This folder contains files shared between the fuzzingbook and debuggingbook projects. \ No newline at end of file diff --git a/notebooks/shared/Timer.ipynb b/notebooks/shared/Timer.ipynb new file mode 100644 index 000000000..73034a6a7 --- /dev/null +++ b/notebooks/shared/Timer.ipynb @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "# Timer\n", + "\n", + "The code in this notebook helps with measuring time." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "**Prerequisites**\n", + "\n", + "* This notebook needs some understanding on advanced concepts in Python, notably \n", + " * classes\n", + " * the Python `with` statement\n", + " * measuring time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synopsis\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "The `Timer` class allows you to measure elapsed real time. Its typical usage is in conjunction with a `with` clause:\n", + "\n", + "```python\n", + "with Timer() as t:\n", + " some_long_running_function()\n", + "t.elapsed_time()\n", + "```\n", + "```python\n", + "=> 0.042843673028983176\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Measuring Time\n", + "\n", + "The class `Timer` allows to measure the elapsed time during some code execution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + }, + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "import bookutils" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "def clock() -> float:\n", + " try:\n", + " return time.perf_counter() # Python 3\n", + " except:\n", + " return time.clock() # Python 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from types import FrameType, TracebackType\n", + "from typing import Type, Any" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "class Timer(object):\n", + " def __enter__(self) -> Any:\n", + " \"\"\"Begin of `with` block\"\"\"\n", + " self.start_time = clock()\n", + " self.end_time = None\n", + " return self\n", + "\n", + " def __exit__(self, exc_type: Type, exc_value: BaseException,\n", + " tb: TracebackType) -> None:\n", + " \"\"\"End of `with` block\"\"\"\n", + " self.end_time = clock() # type: ignore\n", + "\n", + " def elapsed_time(self) -> float:\n", + " \"\"\"Return elapsed time in seconds\"\"\"\n", + " if self.end_time is None:\n", + " # still running\n", + " return clock() - self.start_time\n", + " else:\n", + " return self.end_time - self.start_time # type: ignore" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "def some_long_running_function() -> None:\n", + " i = 1000000\n", + " while i > 0:\n", + " i -= 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "print(\"Stopping total time:\")\n", + "with Timer() as t:\n", + " some_long_running_function()\n", + "print(t.elapsed_time())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "outputs": [], + "source": [ + "print(\"Stopping time in between:\")\n", + "with Timer() as t:\n", + " for i in range(10):\n", + " print(t.elapsed_time())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": false, + "run_control": { + "read_only": false + } + }, + "source": [ + "That's it, folks – enjoy!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synopsis\n", + "\n", + "The `Timer` class allows you to measure elapsed real time. Its typical usage is in conjunction with a `with` clause:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with Timer() as t:\n", + " some_long_running_function()\n", + "t.elapsed_time()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "button": false, + "new_sheet": true, + "run_control": { + "read_only": false + } + }, + "source": [ + "## Lessons Learned\n", + "\n", + "* With the `Timer` class, it is very easy to measure elapsed time." + ] + } + ], + "metadata": { + "ipub": { + "bibliography": "fuzzingbook.bib", + "toc": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "toc-autonumbering": false + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/bookutils/PrettyTable.py b/notebooks/shared/bookutils/PrettyTable.py similarity index 100% rename from notebooks/bookutils/PrettyTable.py rename to notebooks/shared/bookutils/PrettyTable.py diff --git a/notebooks/bookutils/README.md b/notebooks/shared/bookutils/README.md similarity index 100% rename from notebooks/bookutils/README.md rename to notebooks/shared/bookutils/README.md diff --git a/notebooks/bookutils/__init__.py b/notebooks/shared/bookutils/__init__.py similarity index 100% rename from notebooks/bookutils/__init__.py rename to notebooks/shared/bookutils/__init__.py diff --git a/notebooks/bookutils/export_notebook_code.py b/notebooks/shared/bookutils/export_notebook_code.py similarity index 100% rename from notebooks/bookutils/export_notebook_code.py rename to notebooks/shared/bookutils/export_notebook_code.py diff --git a/notebooks/bookutils/set_fixed_seed.py b/notebooks/shared/bookutils/set_fixed_seed.py similarity index 100% rename from notebooks/bookutils/set_fixed_seed.py rename to notebooks/shared/bookutils/set_fixed_seed.py diff --git a/notebooks/shared/fuzzingbook.bib b/notebooks/shared/fuzzingbook.bib new file mode 100644 index 000000000..7a3e36415 --- /dev/null +++ b/notebooks/shared/fuzzingbook.bib @@ -0,0 +1,1756 @@ +@comment{ fuzzingbook bibliography } +@comment{ All entries must have a 'url' entry the HTML version can link to! } + +@comment{ Define common abbreviations for non-BibTeX conversion } +@string{ jan = "January" } +@string{ feb = "February" } +@string{ mar = "March" } +@string{ apr = "April" } +@string{ may = "May" } +@string{ jun = "June" } +@string{ jul = "July" } +@string{ aug = "August" } +@string{ sep = "September" } +@string{ oct = "October" } +@string{ nov = "November" } +@string{ dec = "December" } + +@article{Purdom1972, + year={1972}, + issn={0006-3835}, + journal={BIT Numerical Mathematics}, + volume={12}, + number={3}, + doi={10.1007/BF01932308}, + title={A sentence generator for testing parsers}, + url={http://dx.doi.org/10.1007/BF01932308}, + publisher={Kluwer Academic Publishers}, + author={Purdom, Paul}, + pages={366-375}, + language={English} +} + +@article{Miller1990, + author = {Miller, Barton P. and Fredriksen, Louis and So, Bryan}, + title = {An Empirical Study of the Reliability of {UNIX} Utilities}, + journal = {Commun. ACM}, + issue_date = {Dec. 1990}, + volume = {33}, + number = {12}, + month = dec, + year = {1990}, + issn = {0001-0782}, + pages = {32--44}, + numpages = {13}, + url = {http://doi.acm.org/10.1145/96267.96279}, + doi = {10.1145/96267.96279}, + acmid = {96279}, + publisher = {ACM}, + address = {New York, NY, USA} +} + +@book{Pezze2008, + title={Software Testing and Analysis: Process, Principles, and Techniques}, + author={Pezz{\`e}, Mauro and Young, Michal}, + year={2008}, + publisher={John Wiley \& Sons}, + url={http://ix.cs.uoregon.edu/~michal/book/}, +} + +@article{Luke2000, + author = {Luke, S.}, + title = {Two Fast Tree-creation Algorithms for Genetic Programming}, + journal = {Transactions on Evolutionary Computation}, + issue_date = {September 2000}, + volume = {4}, + number = {3}, + month = sep, + year = {2000}, + issn = {1089-778X}, + pages = {274--283}, + numpages = {10}, + url = {https://doi.org/10.1109/4235.873237}, + doi = {10.1109/4235.873237}, + acmid = {2221499}, + publisher = {IEEE Press}, + address = {Piscataway, NJ, USA}, +} + +@book{fuzzingbook, + author = {Andreas Zeller and Rahul Gopinath and Marcel B{\"o}hme and Gordon Fraser and Christian Holler}, + booktitle = {The Fuzzing Book}, + title = {The Fuzzing Book}, + howpublished = {\url{https://www.fuzzingbook.org/}}, + note = {Retrieved 2019-09-09 13:49:23+02:00}, + url = {https://www.fuzzingbook.org/}, + urldate = {2019-09-09 13:49:23+02:00} +} + +@Article{Burkhardt1967, +author="Burkhardt, W. H.", +title="Generating test programs from syntax", +journal="Computing", +year="1967", +month="Mar", +day="01", +volume="2", +number="1", +pages="53--73", +abstract="The many faces of programming and systems development demand an immense amount of mechanical routine work. The present paper tries to explain some areas where automation of many tasks may be of great help. One special area, where progress seems to lag behind unduly, can be found in debugging, testing, and diagnosing systems. Here we attempted the generation of programs automatically from a definition of a problem and the characteristics of programs for its solution by a software system, which has been specially designed for this purpose. It has been indicated how the ideas underlying this project may be applied successfully to other areas.", +issn="1436-5057", +doi="10.1007/BF02235512", +url="https://doi.org/10.1007/BF02235512" +} + +@inproceedings{Slutz1998, + author = {Slutz, Donald R.}, + title = {Massive Stochastic Testing of SQL}, + booktitle = {Proceedings of the 24rd International Conference on Very Large Data Bases}, + series = {VLDB '98}, + year = {1998}, + isbn = {1-55860-566-5}, + pages = {618--622}, + numpages = {5}, + original_url = {http://dl.acm.org/citation.cfm?id=645924.671199}, + url = {https://www.microsoft.com/en-us/research/publication/massive-stochastic-testing-of-sql/}, + acmid = {671199}, + publisher = {Morgan Kaufmann Publishers Inc.}, + address = {San Francisco, CA, USA}, +} + +@article{Zeller2002, + author = {Zeller, Andreas and Hildebrandt, Ralf}, + title = {Simplifying and Isolating Failure-Inducing Input}, + journal = {IEEE Trans. Softw. Eng.}, + issue_date = {February 2002}, + volume = {28}, + number = {2}, + month = feb, + year = {2002}, + issn = {0098-5589}, + pages = {183--200}, + numpages = {18}, + url = {http://dx.doi.org/10.1109/32.988498}, + doi = {10.1109/32.988498}, + acmid = {506206}, + publisher = {IEEE Press}, + address = {Piscataway, NJ, USA}, + keywords = {Automated debugging, debugging aids, testing tools, combinatorial testing, diagnostics, tracing.}, +} + +@book{Kernighan1999, + author = {Kernighan, Brian W. and Pike, Rob}, + title = {The Practice of Programming}, + year = {1999}, + isbn = {0-201-61586-X}, + publisher = {Addison-Wesley Longman Publishing Co., Inc.}, + address = {Boston, MA, USA}, +} + +@book{Panini350bce, +author = {Dak{\d{s}}iputra P{\=a}{\d{n}}ini}, +title = {Ash{\d{t}}{\=a}dhy{\=a}y{\=i}}, +publisher = {Sanskrit Oral Tradition}, +year = {350 BCE}, +url = {https://en.wikipedia.org/wiki/P%C4%81%E1%B9%87ini%23A%E1%B9%A3%E1%B9%AD%C4%81dhy%C4%81y%C4%AB}, +urldate = {2018-10-10 12:15:00+02:00} +} + +@article{Petke2015, +author={J. Petke and M. B. Cohen and M. Harman and S. Yoo}, +journal={IEEE Transactions on Software Engineering}, +title={Practical Combinatorial Interaction Testing: Empirical Findings on Efficiency and Early Fault Detection}, +year={2015}, +volume={41}, +number={9}, +pages={901-924}, +keywords={genetic algorithms;greedy algorithms;program testing;simulated annealing;software fault tolerance;combinatorial interaction testing;early fault detection;software system configuration space;simulated annealing;SA;greedy algorithm;CIT test suite generation;constraint handling;pairwise testing;genetic algorithm;Testing;Simulated annealing;Genetic algorithms;Fault detection;Greedy algorithms;Turning;Flexible printed circuits;Combinatorial Interaction Testing;Prioritisation;Empirical Studies;Software Testing;Combinatorial interaction testing;prioritisation;empirical studies;software testing}, +doi={10.1109/TSE.2015.2421279}, +ISSN={0098-5589}, +month={Sept},} + +@inproceedings{Herfert2017, + author = {Herfert, Satia and Patra, Jibesh and Pradel, Michael}, + title = {Automatically Reducing Tree-structured Test Inputs}, + booktitle = {Proceedings of the 32Nd IEEE/ACM International Conference on Automated Software Engineering}, + series = {ASE 2017}, + year = {2017}, + isbn = {978-1-5386-2684-9}, + location = {Urbana-Champaign, IL, USA}, + pages = {861--871}, + numpages = {11}, + url = {http://dl.acm.org/citation.cfm?id=3155562.3155669}, + acmid = {3155669}, + publisher = {IEEE Press}, + address = {Piscataway, NJ, USA}, +} + +@article{redziejowski2008, + author = {Redziejowski, Roman R.}, + title = {Some Aspects of Parsing Expression Grammar}, + journal = {Fundam. Inf.}, + issue_date = {January 2008}, + volume = {85}, + number = {1-4}, + month = jan, + year = {2008}, + issn = {0169-2968}, + pages = {441--451}, + numpages = {11}, + url = {http://dl.acm.org/citation.cfm?id=2365896.2365924}, + acmid = {2365924}, + publisher = {IOS Press}, + address = {Amsterdam, The Netherlands, The Netherlands}, +} + +@article{Valiant1975, + author = {Valiant, Leslie G.}, + title = {General Context-free Recognition in Less Than Cubic Time}, + journal = {J. Comput. Syst. Sci.}, + issue_date = {April, 1975}, + volume = {10}, + number = {2}, + month = apr, + year = {1975}, + issn = {0022-0000}, + pages = {308--315}, + numpages = {8}, + url = {http://dx.doi.org/10.1016/S0022-0000(75)80046-8}, + doi = {10.1016/S0022-0000(75)80046-8}, + acmid = {1740048}, + publisher = {Academic Press, Inc.}, + address = {Orlando, FL, USA}, +} + +@article{Lee2002, + author = {Lee, Lillian}, + title = {Fast Context-free Grammar Parsing Requires Fast Boolean Matrix Multiplication}, + journal = {J. ACM}, + issue_date = {January 2002}, + volume = {49}, + number = {1}, + month = jan, + year = {2002}, + issn = {0004-5411}, + pages = {1--15}, + numpages = {15}, + url = {http://doi.acm.org/10.1145/505241.505242}, + doi = {10.1145/505241.505242}, + acmid = {505242}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {Boolean matrix multiplication, context-free grammar parsing}, +} + +@inproceedings{LeGall2014, + author = {Le Gall, Fran\c{c}ois}, + title = {Powers of Tensors and Fast Matrix Multiplication}, + booktitle = {Proceedings of the 39th International Symposium on Symbolic and Algebraic Computation}, + series = {ISSAC '14}, + year = {2014}, + isbn = {978-1-4503-2501-1}, + location = {Kobe, Japan}, + pages = {296--303}, + numpages = {8}, + url = {http://doi.acm.org/10.1145/2608628.2608664}, + doi = {10.1145/2608628.2608664}, + acmid = {2608664}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {algebraic complexity theory, matrix multiplication}, +} + +@article{Hopcroft2001, + title={Introduction to automata theory, languages, and computation}, + author={Hopcroft, John E and Motwani, Rajeev and Ullman, Jeffrey D}, + journal={Acm Sigact News}, + volume={32}, + number={1}, + pages={60--65}, + year={2001}, + publisher={ACM} +} + +@book{Myers2004, + author = {Myers, Glenford J. and Sandler, Corey}, + title = {The Art of Software Testing}, + year = {2004}, + isbn = {0471469122}, + publisher = {John Wiley \&\#38; Sons, Inc.}, + url = {https://dl.acm.org/citation.cfm?id=983238}, + address = {USA}, +} + +@book{Beizer1990, + author = {Beizer, Boris}, + title = {Software Testing Techniques}, + year = {1990}, + isbn = {0442245920}, + publisher = {John Wiley \& Sons, Inc.}, + url = {https://dl.acm.org/citation.cfm?id=79060}, + address = {New York, NY, USA}, +} + +@book{Sutton2007, + author = {Sutton, Michael and Greene, Adam and Amini, Pedram}, + title = {Fuzzing: Brute Force Vulnerability Discovery}, + year = {2007}, + isbn = {0321446119}, + url = {http://www.fuzzing.org/}, + publisher = {Addison-Wesley Professional}, +} + +@book{Takanen2008, + author = {Takanen, Ari and DeMott, Jared and Miller, Charlie}, + title = {Fuzzing for Software Security Testing and Quality Assurance}, + year = {2008}, + isbn = {1596932147, 9781596932142}, + edition = {1}, + publisher = {Artech House, Inc.}, + url = {http://us.artechhouse.com/Fuzzing-for-Software-Security-Testing-and-Quality-Assurance-Second-Edition-P1930.aspx}, + address = {Norwood, MA, USA}, +} + +@article{Dai2010, + author = {Dai, Huning and Murphy, Christian and Kaiser, Gail}, + title = {{CONFU}: Configuration Fuzzing Testing Framework for Software Vulnerability Detection}, + journal = {Int. J. Secur. Softw. Eng.}, + issue_date = {July 2010}, + volume = {1}, + number = {3}, + month = jul, + year = {2010}, + issn = {1947-3036}, + pages = {41--55}, + numpages = {15}, + url = {http://dx.doi.org/10.4018/jsse.2010070103}, + doi = {10.4018/jsse.2010070103}, + acmid = {2441117}, + publisher = {IGI Global}, + address = {Hershey, PA, USA}, + keywords = {Configuration Fuzzing, Fuzz Testing, In Vivo Testing, Security Invariants, Vulnerability}, +} + +@article{Earley1970, + author = {Earley, Jay}, + title = {An Efficient Context-free Parsing Algorithm}, + journal = {Commun. ACM}, + issue_date = {Feb 1970}, + volume = {13}, + number = {2}, + month = feb, + year = {1970}, + issn = {0001-0782}, + pages = {94--102}, + numpages = {9}, + url = {http://doi.acm.org/10.1145/362007.362035}, + doi = {10.1145/362007.362035}, + acmid = {362035}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {compilers, computational complexity, context-free grammar, parsing, syntax analysis}, +} + +@article{Aycock2002, + title={Practical Earley Parsing}, + author={John Aycock and R. Nigel Horspool}, + journal={The Computer Journal}, + year={2002}, + volume={45}, + pages={620-630} +} + +@article{Leo1991, +title = "A general context-free parsing algorithm running in linear time on every {LR(k)} grammar without using lookahead", +journal = "Theoretical Computer Science", +volume = "82", +number = "1", +pages = "165 - 176", +year = "1991", +issn = "0304-3975", +doi = "https://doi.org/10.1016/0304-3975(91)90180-A", +url = "http://www.sciencedirect.com/science/article/pii/030439759190180A", +author = "Joop M.I.M. Leo" +} + +@inproceedings{Elbaum2006, + author = {Elbaum, Sebastian and Chin, Hui Nee and Dwyer, Matthew B. and Dokulil, Jonathan}, + title = {Carving Differential Unit Test Cases from System Test Cases}, + booktitle = {Proceedings of the 14th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, + series = {SIGSOFT '06/FSE-14}, + year = {2006}, + isbn = {1-59593-468-5}, + location = {Portland, Oregon, USA}, + pages = {253--264}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/1181775.1181806}, + doi = {10.1145/1181775.1181806}, + acmid = {1181806}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {automated test generation, carving and replay, regression testing}, +} + +@inproceedings{Lin2008, + author = {Lin, Zhiqiang and Zhang, Xiangyu}, + title = {Deriving Input Syntactic Structure from Execution}, + booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, + series = {SIGSOFT '08/FSE-16}, + year = {2008}, + isbn = {978-1-59593-995-1}, + location = {Atlanta, Georgia}, + pages = {83--93}, + numpages = {11}, + url = {http://doi.acm.org/10.1145/1453101.1453114}, + doi = {10.1145/1453101.1453114}, + acmid = {1453114}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {bottom-up grammar, control dependence, input lineage, reverse engineering, syntax tree, top-down grammar}, +} +@article{Ford2004, + author = {Ford, Bryan}, + title = {Parsing Expression Grammars: A Recognition-based Syntactic Foundation}, + journal = {SIGPLAN Not.}, + issue_date = {January 2004}, + volume = {39}, + number = {1}, + month = jan, + year = {2004}, + issn = {0362-1340}, + pages = {111--122}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/982962.964011}, + doi = {10.1145/982962.964011}, + acmid = {964011}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {BNF, GTDPL, TDPL, context-free grammars, lexical analysis, packrat parsing, parsing expression grammars, regular expressions, scannerless parsing, syntactic predicates, unified grammars}, +} +@article{Ford2002, + author = {Ford, Bryan}, + title = {Packrat Parsing:: Simple, Powerful, Lazy, Linear Time, Functional Pearl}, + journal = {SIGPLAN Not.}, + issue_date = {September 2002}, + volume = {37}, + number = {9}, + month = sep, + year = {2002}, + issn = {0362-1340}, + pages = {36--47}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/583852.581483}, + doi = {10.1145/583852.581483}, + acmid = {581483}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {Haskell, backtracking, lexical analysis, memoization, parser combinators, scannerless parsing, top-down parsing}, +} + +@inproceedings{Holler2012, + author = {Holler, Christian and Herzig, Kim and Zeller, Andreas}, + title = {Fuzzing with Code Fragments}, + booktitle = {Proceedings of the 21st USENIX Conference on Security Symposium}, + series = {Security'12}, + year = {2012}, + location = {Bellevue, WA}, + pages = {38--38}, + numpages = {1}, + url = {https://www.usenix.org/system/files/conference/usenixsecurity12/sec12-final73.pdf}, + acmid = {2362831}, + publisher = {USENIX Association}, + address = {Berkeley, CA, USA}, +} + +@article{Newcomb1881, + author = {Simon Newcomb}, + title = {Note on the frequency of use of the different digits in natural numbers}, + journal = {American Journal of Mathematics}, + volume = {4}, + number = {1--4}, + pages = {39--40}, + year = {1881}, + url = {http://www.jstor.org/stable/2369148}, +} + +@article{Benford1938, + author = "Frank Benford", + title = "The Law of Anomalous Numbers", + journal = "Proceedings of the American Philosophical Society", + volume = "78", + number = "4", + pages = "551--572", + month = mar, + year = "1938", + url = {http://links.jstor.org/sici?sici=0003-049X%2819380331%2978%3A4%3C551%3ATLOAN%3E2.0.CO%3B2-G}, +} + +@article{Chomsky1956, + author = {Chomsky, Noam}, + title = {Three models for the description of language}, + journal = {IRE Transactions on Information Theory}, + pages = {113--124}, + volume = 2, + year = 1956, + url = {https://chomsky.info/wp-content/uploads/195609-.pdf} +} + +@article{Hanford1970, + author = {Hanford, Kenneth V.}, + title = {Automatic Generation of Test Cases}, + journal = {IBM Syst. J.}, + issue_date = {December 1970}, + volume = {9}, + number = {4}, + month = dec, + year = {1970}, + issn = {0018-8670}, + pages = {242--257}, + numpages = {16}, + url = {http://dx.doi.org/10.1147/sj.94.0242}, + doi = {10.1147/sj.94.0242}, + acmid = {1663480}, + publisher = {IBM Corp.}, + address = {Riverton, NJ, USA}, +} + +@inproceedings{Yang2011, + author = {Yang, Xuejun and Chen, Yang and Eide, Eric and Regehr, John}, + title = {Finding and Understanding Bugs in {C} Compilers}, + booktitle = {Proceedings of the 32Nd ACM SIGPLAN Conference on Programming Language Design and Implementation}, + series = {PLDI '11}, + year = {2011}, + isbn = {978-1-4503-0663-8}, + location = {San Jose, California, USA}, + pages = {283--294}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/1993498.1993532}, + doi = {10.1145/1993498.1993532}, + acmid = {1993532}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {automated testing, compiler defect, compiler testing, random program generation, random testing}, +} + +@inproceedings{Le2014, + author = {Le, Vu and Afshari, Mehrdad and Su, Zhendong}, + title = {Compiler Validation via Equivalence Modulo Inputs}, + booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming Language Design and Implementation}, + series = {PLDI '14}, + year = {2014}, + isbn = {978-1-4503-2784-8}, + location = {Edinburgh, United Kingdom}, + pages = {216--226}, + numpages = {11}, + url = {http://doi.acm.org/10.1145/2594291.2594334}, + doi = {10.1145/2594291.2594334}, + acmid = {2594334}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {automated testing, compiler testing, equivalent program variants, miscompilation}, +} + +@book{Aho2006, + author = {Aho, Alfred V. and Lam, Monica S. and Sethi, Ravi and Ullman, Jeffrey D.}, + title = {Compilers: Principles, Techniques, and Tools (2nd edition)}, + year = {2006}, + isbn = {0321486811}, + publisher = {Addison-Wesley Longman Publishing Co., Inc.}, + url = {https://www.pearson.com/us/higher-education/program/Aho-Compilers-Principles-Techniques-and-Tools-2nd-Edition/PGM167067.html}, + address = {Boston, MA, USA}, +} + + +@inproceedings{Hodovan2018, + title = {Grammarinator: A Grammar-based Open Source Fuzzer}, + author = {Hodov{\'a}n, Ren{\'a}ta and Kiss, {\'A}kos and Tibor Gyim{\'o}thy}, + booktitle = {Proceedings of the 9th Workshop on Automating Test Case Design, Selection and Evaluation (A-TEST 2018)}, + year = {2018}, + month = nov, + url = {https://www.researchgate.net/publication/328510752_Grammarinator_a_grammar-based_open_source_fuzzer}, + address = {Lake Buena Vista, Florida, USA}, +} + +@article{ogden1968helpful, + title={A helpful result for proving inherent ambiguity}, + author={Ogden, William}, + journal={Mathematical systems theory}, + volume={2}, + number={3}, + pages={191--194}, + year={1968}, + publisher={Springer} +} + +@article{scott2010gll, + title={GLL parsing}, + author={Scott, Elizabeth and Johnstone, Adrian}, + journal={Electronic Notes in Theoretical Computer Science}, + volume={253}, + number={7}, + pages={177--189}, + year={2010}, + publisher={Elsevier} +} + +@book{tomita2012generalized, + title={Generalized LR parsing}, + author={Tomita, Masaru}, + year={2012}, + publisher={Springer Science \& Business Media} +} + +@article{tomita1987efficient, + title={An efficient augmented-context-free parsing algorithm}, + author={Tomita, Masaru}, + journal={Computational linguistics}, + volume={13}, + number={1-2}, + pages={31--46}, + year={1987}, + publisher={MIT Press} +} + +@article{grune2008parsing, + title={Parsing techniques A Practical Guide}, + author={Grune, Dick and Jacobs, Ceriel JH}, + journal={A practical guide}, + year={2008} +} + +@inproceedings{pingali2015graphical, + title={A Graphical Model for Context-Free Grammar Parsing}, + author={Pingali, Keshav and Bilardi, Gianfranco}, + booktitle={International Conference on Compiler Construction}, + pages={3--27}, + year={2015}, + organization={Springer} +} + +@article{qi2018generalized, + title={Generalized Earley Parser: Bridging Symbolic Grammars and Sequence Data for Future Prediction}, + author={Qi, Siyuan and Jia, Baoxiong and Zhu, Song-Chun}, + journal={arXiv preprint arXiv:1806.03497}, + year={2018} +} + +@article{bar1961formal, + title={On formal properties of simple phrase structure grammars}, + author={Bar-Hillel, Yehoshua and Perles, Micha and Shamir, Eli}, + journal={STUF-Language Typology and Universals}, + volume={14}, + number={1-4}, + pages={143--172}, + year={1961}, + publisher={AKADEMIE VERLAG} +} + +@techreport{Patra2016, + title={Learning to fuzz: Application-independent fuzz testing with probabilistic, generative models of input data}, + author={Patra, Jibesh and Pradel, Michael}, + institution = {TU Darmstadt, Department of Computer Science}, + number = {TUD-CS-2016-14664}, + url = {http://mp.binaervarianz.de/TreeFuzz_TR_Nov2016.pdf}, + year={2016} +} + +@inproceedings{Claessen2000, + author = {Claessen, Koen and Hughes, John}, + title = {QuickCheck: A Lightweight Tool for Random Testing of Haskell Programs}, + booktitle = {Proceedings of the Fifth ACM SIGPLAN International Conference on Functional Programming}, + series = {ICFP '00}, + year = {2000}, + isbn = {1-58113-202-6}, + pages = {268--279}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/351240.351266}, + doi = {10.1145/351240.351266}, + acmid = {351266}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@inproceedings{Misherghi2006, + author = {Misherghi, Ghassan and Su, Zhendong}, + title = {{HDD}: Hierarchical Delta Debugging}, + booktitle = {Proceedings of the 28th International Conference on Software Engineering}, + series = {ICSE '06}, + year = {2006}, + isbn = {1-59593-375-1}, + location = {Shanghai, China}, + pages = {142--151}, + numpages = {10}, + url = {http://doi.acm.org/10.1145/1134285.1134307}, + doi = {10.1145/1134285.1134307}, + acmid = {1134307}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {automated debugging, delta debugging}, +} + +@inproceedings{Regehr2012, + author = {Regehr, John and Chen, Yang and Cuoq, Pascal and Eide, Eric and Ellison, Chucky and Yang, Xuejun}, + title = {Test-case Reduction for C Compiler Bugs}, + booktitle = {Proceedings of the 33rd ACM SIGPLAN Conference on Programming Language Design and Implementation}, + series = {PLDI '12}, + year = {2012}, + isbn = {978-1-4503-1205-9}, + location = {Beijing, China}, + pages = {335--346}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/2254064.2254104}, + doi = {10.1145/2254064.2254104}, + acmid = {2254104},} + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {automated testing, bug reporting, compiler defect, compiler testing, random testing, test-case minimization}, +} + +@techreport{Pavese2018, + author = {Esteban Pavese and Ezekiel Soremekun and Nikolas Havrikov and Lars Grunske and Andreas Zeller}, + title = {Inputs from Hell: Generating Uncommon Inputs from Common Samples}, + institution = {CISPA Helmholtz Center for Information Security}, + url = {http://arxiv.org/abs/1812.07525}, + year={2018} +} + +@inproceedings{Hoschele2017, + author = {H{\"o}schele, Matthias and Zeller, Andreas}, + title = {Mining Input Grammars with AUTOGRAM}, + booktitle = {Proceedings of the 39th International Conference on Software Engineering Companion}, + series = {ICSE-C '17}, + year = {2017}, + isbn = {978-1-5386-1589-8}, + location = {Buenos Aires, Argentina}, + pages = {31--34}, + numpages = {4}, + url = {https://doi.org/10.1109/ICSE-C.2017.14}, + doi = {10.1109/ICSE-C.2017.14}, + acmid = {3098355}, + publisher = {IEEE Press}, + address = {Piscataway, NJ, USA}, + keywords = {context-free grammars, dynamic tainting, fuzzing, input formats}, +} + +@techreport{Kampmann2018, + title={Carving Parameterized Unit Tests}, + institution={CISPA Helmholtz Center for Information Security}, + author={Kampmann, Alexander and Zeller, Andreas}, + journal={arXiv preprint arXiv:1812.07932}, + url={https://arxiv.org/abs/1812.07932}, + month=dec, + year={2018} +} + +@book{higuera2010grammatical, + title={Grammatical inference: learning automata and grammars}, + author={De la Higuera, Colin}, + year={2010}, + publisher={Cambridge University Press} +} + +@article{clark2013learning, + title={Learning trees from strings: A strong learning algorithm for some context-free grammars}, + author={Clark, Alexander}, + journal={The Journal of Machine Learning Research}, + volume={14}, + number={1}, + pages={3537--3559}, + year={2013}, + publisher={JMLR. org} +} + +@article{king1976symbolic, + author = {King, James C.}, + title = {Symbolic Execution and Program Testing}, + journal = {Commun. ACM}, + issue_date = {July 1976}, + volume = {19}, + number = {7}, + month = jul, + year = {1976}, + issn = {0001-0782}, + pages = {385--394}, + numpages = {10}, + url = {http://doi.acm.org/10.1145/360248.360252}, + doi = {10.1145/360248.360252}, + acmid = {360252}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@inproceedings{wang2017angr, + title={Angr-The Next Generation of Binary Analysis}, + author={Wang, Fish and Shoshitaishvili, Yan}, + booktitle={Cybersecurity Development (SecDev), 2017 IEEE}, + pages={8--9}, + year={2017}, + organization={IEEE} +} + +@article{godefroid2012sage, + title={{SAGE}: whitebox fuzzing for security testing}, + author={Godefroid, Patrice and Levin, Michael Y and Molnar, David}, + journal={Queue}, + volume={10}, + number={1}, + pages={20}, + year={2012}, + publisher={ACM} +} + +@inproceedings{stephens2016driller, + title={Driller: Augmenting Fuzzing Through Selective Symbolic Execution.}, + author={Stephens, Nick and Grosen, John and Salls, Christopher and Dutcher, Andrew and Wang, Ruoyu and Corbetta, Jacopo and Shoshitaishvili, Yan and Kruegel, Christopher and Vigna, Giovanni}, + booktitle={NDSS}, + volume={16}, + pages={1--16}, + year={2016} +} + +@inproceedings{Memon2001, + author = {Memon, Atif M. and Soffa, Mary Lou and Pollack, Martha E.}, + title = {Coverage Criteria for {GUI} Testing}, + booktitle = {Proceedings of the 8th European Software Engineering Conference Held Jointly with 9th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, + series = {ESEC/FSE-9}, + year = {2001}, + isbn = {1-58113-390-1}, + location = {Vienna, Austria}, + pages = {256--267}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/503209.503244}, + doi = {10.1145/503209.503244}, + acmid = {503244}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {GUI test coverage, GUI testing, component testing, event-based coverage, event-flow graph, integration tree}, +} + +@inproceedings{Memon2003, + author = {Memon, Atif and Banerjee, Ishan and Nagarajan, Adithya}, + title = {{GUI} Ripping: Reverse Engineering of Graphical User Interfaces for Testing}, + booktitle = {Proceedings of the 10th Working Conference on Reverse Engineering}, + series = {WCRE '03}, + year = {2003}, + isbn = {0-7695-2027-8}, + pages = {260--}, + url = {http://dl.acm.org/citation.cfm?id=950792.951350}, + acmid = {951350}, + publisher = {IEEE Computer Society}, + address = {Washington, DC, USA}, +} + +@article{Mesbah2012, + author = {Mesbah, Ali and van Deursen, Arie and Lenselink, Stefan}, + title = {Crawling Ajax-Based Web Applications Through Dynamic Analysis of User Interface State Changes}, + journal = {ACM Trans. Web}, + issue_date = {March 2012}, + volume = {6}, + number = {1}, + month = mar, + year = {2012}, + issn = {1559-1131}, + pages = {3:1--3:30}, + articleno = {3}, + numpages = {30}, + url = {http://doi.acm.org/10.1145/2109205.2109208}, + doi = {10.1145/2109205.2109208}, + acmid = {2109208}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {Ajax, Crawling, DOM crawling, Web 2.0, dynamic analysis, hidden web}, +} + + +@inproceedings{Conti2010, + author = {Conti, Juan Jos{\'e} and Russo, Alejandro}, + title = {A Taint Mode for Python via a Library}, + booktitle = {Proceedings of the 15th Nordic Conference on Information Security Technology for Applications}, + series = {NordSec'10}, + year = {2012}, + isbn = {978-3-642-27936-2}, + location = {Espoo, Finland}, + pages = {210--222}, + numpages = {13}, + url = {http://dx.doi.org/10.1007/978-3-642-27937-9_15}, + doi = {10.1007/978-3-642-27937-9_15}, + acmid = {2341484}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, +} + +@article{siever1999perl, + title={Perl in a Nutshell}, + author={Siever, Ellen and Spainhour, Stephen and Patwardhan, Nathan}, + year={1999}, + publisher={O'Reilly \& Associates, Inc.} +} + +@article{Barsotti2018, + title = {{PEF}: Python Error Finder}, + journal = {Electronic Notes in Theoretical Computer Science}, + volume = {339}, + pages = {21--41}, + year = {2018}, + note = {The XLII Latin American Computing Conference}, + issn = {1571-0661}, + doi = {https://doi.org/10.1016/j.entcs.2018.06.003}, + url = {http://www.sciencedirect.com/science/article/pii/S1571066118300471}, + author = {Dami{\'a}n Barsotti and Andr{\'e}s M. Bordese and Tom{\'a}s Hayes}, +} + +@techreport{PeerCheck, + title = {A peer architecture for lightweight symbolic execution}, + author = {A. Bruni and T. Disney and C. Flanagan}, + institution = {University of California, Santa Cruz}, + year = {2011}, + url = {https://hoheinzollern.files.wordpress.com/2008/04/seer1.pdf} +} + +@inproceedings{Larson2003, + author = {Larson, Eric and Austin, Todd}, + title = {High Coverage Detection of Input-related Security Facults}, + booktitle = {Proceedings of the 12th Conference on USENIX Security Symposium - Volume 12}, + series = {SSYM'03}, + year = {2003}, + location = {Washington, DC}, + pages = {9--9}, + numpages = {1}, + url = {http://dl.acm.org/citation.cfm?id=1251353.1251362}, + acmid = {1251362}, + publisher = {USENIX Association}, + address = {Berkeley, CA, USA}, +} + +@inproceedings{cadar2005execution, + title={Execution generated test cases: How to make systems code crash itself}, + author={Cadar, Cristian and Engler, Dawson}, + booktitle={International SPIN Workshop on Model Checking of Software}, + pages={2--23}, + year={2005}, + organization={Springer} +} + +@article{Ernst2001, + author = {Ernst, Michael D. and Cockrell, Jake and Griswold, William G. and Notkin, David}, + title = {Dynamically Discovering Likely Program Invariants to Support Program Evolution}, + journal = {IEEE Trans. Softw. Eng.}, + issue_date = {February 2001}, + volume = {27}, + number = {2}, + month = feb, + year = {2001}, + issn = {0098-5589}, + pages = {99--123}, + numpages = {25}, + url = {https://doi.org/10.1109/32.908957}, + doi = {10.1109/32.908957}, + acmid = {373397}, + publisher = {IEEE Press}, + address = {Piscataway, NJ, USA}, + keywords = {Program invariants, formal specification, software evolution, dynamic analysis, execution traces, logical inference, pattern recognition.}, + url = {https://homes.cs.washington.edu/~mernst/pubs/invariants-tse2001.pdf} +} + +@inproceedings{Pacheco2005, + author = {Pacheco, Carlos and Ernst, Michael D.}, + title = {Eclat: Automatic Generation and Classification of Test Inputs}, + booktitle = {Proceedings of the 19th European Conference on Object-Oriented Programming}, + series = {ECOOP'05}, + year = {2005}, + isbn = {3-540-27992-X, 978-3-540-27992-1}, + location = {Glasgow, UK}, + pages = {504--527}, + numpages = {24}, + url = {http://dx.doi.org/10.1007/11531142_22}, + doi = {10.1007/11531142_22}, + acmid = {2144921}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, + url = {https://homes.cs.washington.edu/~mernst/pubs/classify-tests-ecoop2005.pdf} +} + +@inproceedings{Ammons2002, + author = {Ammons, Glenn and Bod\'{\i}k, Rastislav and Larus, James R.}, + title = {Mining Specifications}, + booktitle = {Proceedings of the 29th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages}, + series = {POPL '02}, + year = {2002}, + isbn = {1-58113-450-9}, + location = {Portland, Oregon}, + pages = {4--16}, + numpages = {13}, + url = {http://doi.acm.org/10.1145/503272.503275}, + doi = {10.1145/503272.503275}, + acmid = {503275}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@misc{lipton1971fault, + title={Fault diagnosis of computer programs}, + author={Lipton, Richard J}, + year={1971}, + publisher={Carnegie Mellon Univ., Tech. Rep} +} + +@article{jia2011analysis, + title={An analysis and survey of the development of mutation testing}, + author={Jia, Yue and Harman, Mark}, + journal={IEEE transactions on software engineering}, + volume={37}, + number={5}, + pages={649--678}, + year={2011}, + publisher={IEEE} +} + +@incollection{papadakis2019mutation, + title={Mutation testing advances: an analysis and survey}, + author={Papadakis, Mike and Kintis, Marinos and Zhang, Jie and Jia, Yue and Le Traon, Yves and Harman, Mark}, + booktitle={Advances in Computers}, + volume={112}, + pages={275--378}, + year={2019}, + publisher={Elsevier} +} + +@article{boehme2018species, + author={B{\"o}hme, Marcel}, + journal={ACM Transactions on Software Engineering and Methodology}, + title={{STADS}: Software Testing as Species Discovery}, + issue_date = {June 2018}, + volume = {27}, + number = {2}, + month = jun, + year = {2018}, + pages = {7:1--7:52}, + articleno = {7}, + numpages = {52}, + doi = {10.1145/3210309} +} + +@article{boehme2018greybox, +author={B{\"o}hme, Marcel and Pham, Van-Thuan and Roychoudhury, Abhik}, +journal={IEEE Transactions on Software Engineering}, +title={Coverage-based Greybox Fuzzing as {Markov} Chain}, +url={https://mboehme.github.io/paper/CCS16.pdf}, +year={2018}, +pages={1-18} +} + +@inproceedings{boehme2017greybox, + author = {B{\"o}hme, Marcel and Pham, Van-Thuan and Nguyen, Manh-Dung and Roychoudhury, Abhik}, + title = {Directed Greybox Fuzzing}, + booktitle = {Proceedings of the 24th ACM Conference on Computer and Communications Security}, + series = {CCS}, + year = {2017}, + pages = {1-16}, + url = {https://mboehme.github.io/paper/CCS17.pdf}, + numpages = {16} +} + +@article{boehme2016efficiency, +author={B{\"o}hme, Marcel and Paul, Soumya}, +journal={IEEE Transactions on Software Engineering}, +title={A Probabilistic Analysis of the Efficiency of Automated Software Testing}, +year={2016}, +volume={42}, +number={4}, +pages={345-360}, +keywords={Efficient Testing;Error-based Partitioning;Partition Testing;Random Testing;Testing Theory}, +doi={10.1109/TSE.2015.2487274}, +ISSN={0098-5589}, +month={April}, +url={https://mboehme.github.io/paper/TSE15.pdf} +} + +@techreport{Pham2018aflsmart, + title={Smart Greybox Fuzzing}, + institution={National University of Singapore, Singapore and Monash University, Australia and University Politehnica of Bucharest, Romania}, + author={Van-Thuan Pham and Marcel B{\"o}hme and Andrew E. Santosa and Alexandru R\u{a}zvan C\u{a}ciulescu and Abhik Roychoudhury}, + journal={arXiv preprint arXiv:1811.09447}, + url={https://arxiv.org/abs/1811.09447}, + month=nov, + year={2018} +} + +@inproceedings{Wang2019superion, + title={Superion: Grammar-Aware Greybox Fuzzing}, + author={Junjie Wang and Bihuan Chen and Lei Wei and Yang Liu}, + booktitle = {Proceedings of ICSE 2019}, + year = {2019}, + url = {https://2019.icse-conferences.org/event/icse-2019-technical-papers-superion-grammar-aware-greybox-fuzzing} +} + +@inproceedings{Aschermann2019nautilus, + title={{NAUTILUS:} Fishing for Deep Bugs with Grammars}, + author={Cornelius Aschermann and Tommaso Frassetto and Thorsten Holz and Patrick Jauernig and Ahmad-Reza Sadeghi and Daniel Teuchert}, + booktitle = {Proceedings of NDSS 2019}, + year = {2019}, + url = {https://www.ndss-symposium.org/ndss-paper/nautilus-fishing-for-deep-bugs-with-grammars/} +} + +@inproceedings{Bettenburg2008, + author = {Bettenburg, Nicolas and Just, Sascha and Schr\"{o}ter, Adrian and Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas}, + title = {What Makes a Good Bug Report?}, + booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, + series = {SIGSOFT '08/FSE-16}, + year = {2008}, + isbn = {978-1-59593-995-1}, + location = {Atlanta, Georgia}, + pages = {308--318}, + numpages = {11}, + url = {http://thomas-zimmermann.com/publications/files/bettenburg-fse-2008.pdf}, + acmid = {1453146}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@inproceedings{Godefroid2017, + author = {Godefroid, Patrice and Peleg, Hila and Singh, Rishabh}, + title = {{Learn\&{}Fuzz}: Machine Learning for Input Fuzzing}, + booktitle = {Proceedings of the 32nd IEEE/ACM International Conference on Automated Software Engineering}, + series = {ASE 2017}, + year = {2017}, + isbn = {978-1-5386-2684-9}, + location = {Urbana-Champaign, IL, USA}, + pages = {50--59}, + numpages = {10}, + url = {http://dl.acm.org/citation.cfm?id=3155562.3155573}, + acmid = {3155573}, + publisher = {IEEE Press}, + address = {Piscataway, NJ, USA}, + keywords = {deep learning, fuzzing, grammar learning, grammar-based fuzzing}, +} + +@inproceedings{Sun2018, + author = {Sun, Chengnian and Li, Yuanbo and Zhang, Qirun and Gu, Tianxiao and Su, Zhendong}, + title = {Perses: Syntax-guided Program Reduction}, + booktitle = {Proceedings of the 40th International Conference on Software Engineering}, + series = {ICSE '18}, + year = {2018}, + isbn = {978-1-4503-5638-1}, + location = {Gothenburg, Sweden}, + pages = {361--371}, + numpages = {11}, + url = {http://doi.acm.org/10.1145/3180155.3180236}, + doi = {10.1145/3180155.3180236}, + acmid = {3180236}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {debugging, delta debugging, program reduction}, +} + +@book{Aniche2020, + title={Software Testing: From Theory to Practice}, + author={Maur{\'i}cio Aniche and Arie van Deursen}, + year={2020}, + url={https://sttp.site}, +} + +@inproceedings{z3, +author = {De Moura, Leonardo and Bj\o{}rner, Nikolaj}, +title = {{Z3}: An Efficient {SMT} Solver}, +year = {2008}, +isbn = {3540787992}, +publisher = {Springer-Verlag}, +address = {Berlin, Heidelberg}, +abstract = {Satisfiability Modulo Theories (SMT) problem is a decision problem for logical first order formulas with respect to combinations of background theories such as: arithmetic, bit-vectors, arrays, and uninterpreted functions. Z3 is a new and efficient SMT Solver freely available from Microsoft Research. It is used in various software verification and analysis applications.}, +booktitle = {Proceedings of the Theory and Practice of Software, 14th International Conference on Tools and Algorithms for the Construction and Analysis of Systems}, +pages = {337--340}, +numpages = {4}, +location = {Budapest, Hungary}, +series = {TACAS'08/ETAPS'08}, +url={https://link.springer.com/chapter/10.1007/978-3-540-78800-3_24}, +} + +@book{zeller2009-why-programs-fail, + author = {Andreas Zeller}, + title = {Why Programs Fail - {A} Guide to Systematic Debugging, 2nd Edition}, + publisher = {Morgan Kaufmann}, + year = {2009}, + url = {http://www.whyprogramsfail.com/}, + isbn = {978-0-12-374515-6}, + timestamp = {Mon, 06 Feb 2017 15:25:22 +0100}, + biburl = {https://dblp.org/rec/books/daglib/0039904.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@book{spinellis2016-effective-debugging, + author = {Diomidis Spinellis}, + title = {Effective Debugging: 66 Specific Ways to Debug Software and Systems}, + publisher = {Addison-Wesley Professional}, + year = {2016}, + url = {https://www.spinellis.gr/debugging/} +} + +@book{agans2006-debugging, +author = {Agans, David J.}, +title = {DeBugging: The 9 Indispensable Rules for Finding Even the Most Elusive Software and Hardware Problems}, +year = {2002}, +isbn = {0814471684}, +publisher = {American Management Assoc., Inc.}, +address = {USA}, +abstract = {From the Publisher: When the pressure is on to root out an elusive software or hardware glitch, what's needed is a cool head courtesy of a set of rulesguaranteed to work on any system, in any circumstance. Written in a frank but engaging style, Debugging provides simple, foolproof principles guaranteed to help find any bug quickly. This book makes those shelves of application-specific debugging books (on C++, Perl, Java, etc.) obsolete. It changes the way readers think about debugging, making those pesky problems suddenly much easier to find and fix. Illustrating the rules with real-life bug-detection war stories, the book shows readers how to: Understand the system: how perceiving the "roadmap" can hasten your journey Quit thinking and look: when hands-on investigation can't be avoided Isolate critical factors: why changing one element at a time can be an essential tool Keep an audit trail: how keeping a record of the debugging process can win the day Author Biography: David J. Agans (Milford, NH) is a recognized expert called in to help with tough debugging problems. He currently runs PointSource, a computer systems consultancy. He has worked with industrial control and monitoring systems, integrated circuit design, handheld PCs, videoconferencing, and countless other systems.}, +url = {https://dl.acm.org/doi/book/10.5555/555103} +} + +@article{Abreu2009, +author = {Abreu, Rui and Zoeteweij, Peter and Golsteijn, Rob and van Gemund, Arjan J. C.}, +title = {A Practical Evaluation of Spectrum-Based Fault Localization}, +year = {2009}, +issue_date = {November, 2009}, +publisher = {Elsevier Science Inc.}, +address = {USA}, +volume = {82}, +number = {11}, +issn = {0164-1212}, +url = {https://doi.org/10.1016/j.jss.2009.06.035}, +doi = {10.1016/j.jss.2009.06.035}, +abstract = {Spectrum-based fault localization (SFL) shortens the test-diagnose-repair cycle by reducing the debugging effort. As a light-weight automated diagnosis technique it can easily be integrated with existing testing schemes. Since SFL is based on discovering statistical coincidences between system failures and the activity of the different parts of a system, its diagnostic accuracy is inherently limited. Using a common benchmark consisting of the Siemens set and the space program, we investigate this diagnostic accuracy as a function of several parameters (such as quality and quantity of the program spectra collected during the execution of the system), some of which directly relate to test design. Our results indicate that the superior performance of a particular similarity coefficient, used to analyze the program spectra, is largely independent of test design. Furthermore, near-optimal diagnostic accuracy (exonerating over 80% of the blocks of code on average) is already obtained for low-quality error observations and limited numbers of test cases. In addition to establishing these results in the controlled environment of our benchmark set, we show that SFL can effectively be applied in the context of embedded software development in an industrial environment.}, +journal = {J. Syst. Softw.}, +month = nov, +pages = {1780--1792}, +numpages = {13}, +keywords = {Real-time and embedded systems, Software fault diagnosis, Program spectra, Test data analysis, Consumer electronics} +} + +@inproceedings{Jones2002, +author = {Jones, James A. and Harrold, Mary Jean and Stasko, John}, +title = {Visualization of Test Information to Assist Fault Localization}, +year = {2002}, +isbn = {158113472X}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/581339.581397}, +doi = {10.1145/581339.581397}, +abstract = {One of the most expensive and time-consuming components of the debugging process is locating the errors or faults. To locate faults, developers must identify statements involved in failures and select suspicious statements that might contain faults. This paper presents a new technique that uses visualization to assist with these tasks. The technique uses color to visually map the participation of each program statement in the outcome of the execution of the program with a test suite, consisting of both passed and failed test cases. Based on this visual mapping, a user can inspect the statements in the program, identify statements involved in failures, and locate potentially faulty statements. The paper also describes a prototype tool that implements our technique along with a set of empirical studies that use the tool for evaluation of the technique. The empirical studies show that, for the subject we studied, the technique can be effective in helping a user locate faults in a program.}, +booktitle = {Proceedings of the 24th International Conference on Software Engineering}, +pages = {467--477}, +numpages = {11}, +location = {Orlando, Florida}, +series = {ICSE '02} +} + +@article{daSilvaMeyer2004, + title = {Comparison of similarity coefficients used for cluster analysis with dominant markers in maize ({Zea mays L})}, + journal = {Genetics and Molecular Biology}, + author={da Silva Meyer, Andr\'eia and Garcia, Antonio Augusto Franco and de Souza, Anete Pereira and de Souza Jr., Cl\'audio Lopes}, + ISSN = {1415-4757}, + url = {https://doi.org/10.1590/S1415-47572004000100014}, + doi = {https://doi.org/10.1590/S1415-47572004000100014}, + volume = {27}, + year = {2004}, + month = {00}, + pages = {83--91}, + publisher = {scielo}, + crossref = {10.1590/S1415-47572004000100014} +} + +@article{Wong2016, +author = {Wong, W. Eric and Gao, Ruizhi and Li, Yihao and Abreu, Rui and Wotawa, Franz}, +title = {A Survey on Software Fault Localization}, +year = {2016}, +issue_date = {August 2016}, +publisher = {IEEE Press}, +volume = {42}, +number = {8}, +issn = {0098-5589}, +url = {https://doi.org/10.1109/TSE.2016.2521368}, +doi = {10.1109/TSE.2016.2521368}, +abstract = {Software fault localization, the act of identifying the locations of faults in a program, is widely recognized to be one of the most tedious, time consuming, and expensive---yet equally critical---activities in program debugging. Due to the increasing scale and complexity of software today, manually locating faults when failures occur is rapidly becoming infeasible, and consequently, there is a strong demand for techniques that can guide software developers to the locations of faults in a program with minimal human intervention. This demand in turn has fueled the proposal and development of a broad spectrum of fault localization techniques, each of which aims to streamline the fault localization process and make it more effective by attacking the problem in a unique way. In this article, we catalog and provide a comprehensive overview of such techniques and discuss key issues and concerns that are pertinent to software fault localization as a whole.}, +journal = {IEEE Trans. Softw. Eng.}, +month = aug, +pages = {707--740}, +numpages = {34} +} + +@inproceedings{Parnin2011, +author = {Parnin, Chris and Orso, Alessandro}, +title = {Are Automated Debugging Techniques Actually Helping Programmers?}, +year = {2011}, +isbn = {9781450305624}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/2001420.2001445}, +doi = {10.1145/2001420.2001445}, +abstract = {Debugging is notoriously difficult and extremely time consuming. Researchers have therefore invested a considerable amount of effort in developing automated techniques and tools for supporting various debugging tasks. Although potentially useful, most of these techniques have yet to demonstrate their practical effectiveness. One common limitation of existing approaches, for instance, is their reliance on a set of strong assumptions on how developers behave when debugging (e.g., the fact that examining a faulty statement in isolation is enough for a developer to understand and fix the corresponding bug). In more general terms, most existing techniques just focus on selecting subsets of potentially faulty statements and ranking them according to some criterion. By doing so, they ignore the fact that understanding the root cause of a failure typically involves complex activities, such as navigating program dependencies and rerunning the program with different inputs. The overall goal of this research is to investigate how developers use and benefit from automated debugging tools through a set of human studies. As a first step in this direction, we perform a preliminary study on a set of developers by providing them with an automated debugging tool and two tasks to be performed with and without the tool. Our results provide initial evidence that several assumptions made by automated debugging techniques do not hold in practice. Through an analysis of the results, we also provide insights on potential directions for future work in the area of automated debugging.}, +booktitle = {Proceedings of the 2011 International Symposium on Software Testing and Analysis}, +pages = {199--209}, +numpages = {11}, +keywords = {statistical debugging, user studies}, +location = {Toronto, Ontario, Canada}, +series = {ISSTA '11} +} + +@article{Ochiai1957, + title={Zoogeographical Studies on the Soleoid Fishes Found in Japan and its Neighbouring Regions-III}, + author={Akira Ochiai}, + journal={Nippon Suisan Gakkaishi}, + year={1957}, + url={https://www.jstage.jst.go.jp/article/suisan1932/22/9/22_9_522/_article/-char/ja/}, + volume={22}, + pages={522--525} +} + +@inproceedings{Kirschner2020, +author = {Kirschner, Lukas and Soremekun, Ezekiel and Zeller, Andreas}, +title = {Debugging Inputs}, +year = {2020}, +isbn = {9781450371223}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://publications.cispa.saarland/3060/}, +XXXdoi = {10.1145/3377812.3390797}, +abstract = {Program failures are often caused by invalid inputs, for instance due to input corruption. To obtain the passing input, one needs to debug the data. In this paper we present a generic technique called ddmax that (1) identifies which parts of the input data prevent processing, and (2) recovers as much of the (valuable) input data as possible. To the best of our knowledge, ddmax is the first approach that fixes faults in the input data without requiring program analysis. In our evaluation, ddmax repaired about 69% of input files and recovered about 78% of data within one minute per input.}, +booktitle = {Proceedings of the ACM/IEEE 42nd International Conference on Software Engineering: Companion Proceedings}, +pages = {300--301}, +numpages = {2}, +location = {Seoul, South Korea}, +series = {ICSE '20} +} + +@inproceedings{Ness1997, +author = {Ness, Brian and Ngo, Viet}, +title = {Regression Containment through Source Change Isolation}, +year = {1997}, +isbn = {0818681055}, +publisher = {IEEE Computer Society}, +url={https://www.computer.org/csdl/proceedings-article/compsac/1997/81050616/12OmNANBZnS}, +address = {USA}, +abstract = {Effective regression containment is an important factor in the design of development and testing processes for large software projects, especially when many developers are doing concurrent work on a common set of sources. Source change isolation provides an inexpensive, mechanical alternative to analytical methods for identifying the cause of software regressions. It also provides the advantage of enabling regressions to be eliminated by reversing the effect of source changes that introduced errant behavior, without the need to write new code, and without halting other development work on the same software. Deliverability is also improved.}, +booktitle = {Proceedings of the 21st International Computer Software and Applications Conference}, +pages = {616}, +numpages = {1}, +series = {COMPSAC '97} +} + +@inproceedings{zheng2003, +author = {Zheng, Alice X. and Jordan, Michael I. and Liblit, Ben and Aiken, Alex}, +title = {Statistical Debugging of Sampled Programs}, +year = {2003}, +publisher = {MIT Press}, +address = {Cambridge, MA, USA}, +abstract = {We present a novel strategy for automatically debugging programs given sampled data from thousands of actual user runs. Our goal is to pinpoint those features that are most correlated with crashes. This is accomplished by maximizing an appropriately defined utility function. It has analogies with intuitive debugging heuristics, and, as we demonstrate, is able to deal with various types of bugs that occur in real programs.}, +booktitle = {Proceedings of the 16th International Conference on Neural Information Processing Systems}, +pages = {603--610}, +numpages = {8}, +location = {Whistler, British Columbia, Canada}, +series = {NIPS'03} +} + +@inproceedings{Liblit2003, +author = {Liblit, Ben and Aiken, Alex and Zheng, Alice X. and Jordan, Michael I.}, +title = {Bug Isolation via Remote Program Sampling}, +year = {2003}, +isbn = {1581136625}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/781131.781148}, +doi = {10.1145/781131.781148}, +abstract = {We propose a low-overhead sampling infrastructure for gathering information from the executions experienced by a program's user community. Several example applications illustrate ways to use sampled instrumentation to isolate bugs. Assertion-dense code can be transformed to share the cost of assertions among many users. Lacking assertions, broad guesses can be made about predicates that predict program errors and a process of elimination used to whittle these down to the true bug. Finally, even for non-deterministic bugs such as memory corruption, statistical modeling based on logistic regression allows us to identify program behaviors that are strongly correlated with failure and are therefore likely places to look for the error.}, +booktitle = {Proceedings of the ACM SIGPLAN 2003 Conference on Programming Language Design and Implementation}, +pages = {141–-154}, +numpages = {14}, +keywords = {statistical debugging, bug isolation, random sampling, logistic regression, assertions, feature selection}, +location = {San Diego, California, USA}, +series = {PLDI '03} +} + +@inproceedings{10.5555/318773.318946, +author = {Zeller, Andreas}, +title = {Yesterday, My Program Worked. Today, It Does Not. Why?}, +year = {1999}, +isbn = {3540665382}, +publisher = {Springer-Verlag}, +address = {Berlin, Heidelberg}, +abstract = {Imagine some program and a number of changes. If none of these changes is applied (“yesterday”), the program works. If all changes are applied (“today”), the program does not work. Which change is responsible for the failure? We present an efficient algorithm that determines the minimal set of failure-inducing changes. Our delta debugging prototype tracked down a single failure-inducing change from 178,000 changed GDB lines within a few hours.}, +booktitle = {Proceedings of the 7th European Software Engineering Conference Held Jointly with the 7th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, +pages = {253--267}, +numpages = {15}, +location = {Toulouse, France}, +series = {ESEC/FSE-7} +} + +@article{Zeller1999, +author = {Zeller, Andreas}, +title = {Yesterday, My Program Worked. Today, It Does Not. Why?}, +year = {1999}, +issue_date = {Nov. 1999}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {24}, +number = {6}, +issn = {0163-5948}, +url = {https://doi.org/10.1145/318774.318946}, +doi = {10.1145/318774.318946}, +abstract = {Imagine some program and a number of changes. If none of these changes is applied (“yesterday”), the program works. If all changes are applied (“today”), the program does not work. Which change is responsible for the failure? We present an efficient algorithm that determines the minimal set of failure-inducing changes. Our delta debugging prototype tracked down a single failure-inducing change from 178,000 changed GDB lines within a few hours.}, +journal = {SIGSOFT Softw. Eng. Notes}, +month = oct, +pages = {253--267}, +numpages = {15} +} + +@inproceedings{Chen2014, + author={Z. Chen and L. Chen and Y. Zhou and Z. Xu and W. C. Chu and B. Xu}, + booktitle={2014 IEEE 38th Annual Computer Software and Applications Conference}, + title={Dynamic Slicing of Python Programs}, + year={2014}, + volume={}, + number={}, + pages={219-228}, + doi={10.1109/COMPSAC.2014.30} +} + +@article{Weiser1982, +author = {Weiser, Mark}, +title = {Programmers Use Slices When Debugging}, +year = {1982}, +issue_date = {July 1982}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {25}, +number = {7}, +issn = {0001-0782}, +url = {https://doi.org/10.1145/358557.358577}, +doi = {10.1145/358557.358577}, +abstract = {Computer programmers break apart large programs into smaller coherent pieces. Each of these pieces: functions, subroutines, modules, or abstract datatypes, is usually a contiguous piece of program text. The experiment reported here shows that programmers also routinely break programs into one kind of coherent piece which is not contiguous. When debugging unfamiliar programs programmers use program pieces called slices which are sets of statements related by their flow of data. The statements in a slice are not necessarily textually contiguous, but may be scattered through a program.}, +journal = {Commun. ACM}, +month = jul, +pages = {446–452}, +numpages = {7}, +keywords = {slice, program decomposition} +} + +@inproceedings{10Weiser1981, +author = {Weiser, Mark}, +title = {Program Slicing}, +year = {1981}, +isbn = {0897911466}, +publisher = {IEEE Press}, +abstract = {Program slicing is a method used by experienced computer programmers for abstracting from programs. Starting from a subset of a program's behavior, slicing reduces that program to a minimal form which still produces that behavior. The reduced program, called a “slice”, is an independent program guaranteed to faithfully represent the original program within the domain of the specified subset of behavior. Finding a slice is in general unsolvable. A dataflow algorithm is presented for approximating slices when the behavior subset is specified as the values of a set of variables at a statement. Experimental evidence is presented that these slices are used by programmers during debugging. Experience with two automatic slicing tools is summarized. New measures of program complexity are suggested based on the organization of a program's slices.}, +booktitle = {Proceedings of the 5th International Conference on Software Engineering}, +pages = {439–449}, +numpages = {11}, +keywords = {Human factors, Data flow analysis, Program metrics, Program maintenance, Debugging, Software tools}, +location = {San Diego, California, USA}, +series = {ICSE '81} +} + +@article{Tip1995, + title={A survey of program slicing techniques}, + author={Tip, Frank}, + journal={Journal of programming languages}, + volume={3}, + number={3}, + pages={121--189}, + url={https://www.franktip.org/pubs/jpl1995.pdf}, + year={1995} +} + +@article{Korel1988, +author = {Korel, B. and Laski, J.}, +title = {Dynamic Program Slicing}, +year = {1988}, +issue_date = {October 26, 1988}, +publisher = {Elsevier North-Holland, Inc.}, +address = {USA}, +volume = {29}, +number = {3}, +issn = {0020-0190}, +url = {https://doi.org/10.1016/0020-0190(88)90054-3}, +doi = {10.1016/0020-0190(88)90054-3}, +journal = {Inf. Process. Lett.}, +month = oct, +pages = {155–163}, +numpages = {9} +} + +@inproceedings{Agrawal1990, +author = {Agrawal, Hiralal and Horgan, Joseph R.}, +title = {Dynamic Program Slicing}, +year = {1990}, +isbn = {0897913647}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/93542.93576}, +doi = {10.1145/93542.93576}, +abstract = {Program slices are useful in debugging, testing, maintenance, and understanding of programs. The conventional notion of a program slice, the static slice, is the set of all statements that might affect the value of a given variable occurrence. In this paper, we investigate the concept of the dynamic slice consisting of all statements that actually affect the value of a variable occurrence for a given program input. The sensitivity of dynamic slicing to particular program inputs makes it more useful in program debugging and testing than static slicing. Several approaches for computing dynamic slices are examined. The notion of a Dynamic Dependence Graph and its use in computing dynamic slices is discussed. The Dynamic Dependence Graph may be unbounded in length; therefore, we introduce the economical concept of a Reduced Dynamic Dependence Graph, which is proportional in size to the number of dynamic slices arising during the program execution.}, +booktitle = {Proceedings of the ACM SIGPLAN 1990 Conference on Programming Language Design and Implementation}, +pages = {246–256}, +numpages = {11}, +location = {White Plains, New York, USA}, +series = {PLDI '90} +} + +@inproceedings{Ko2004, +author = {Ko, Andrew J. and Myers, Brad A.}, +title = {Designing the Whyline: A Debugging Interface for Asking Questions about Program Behavior}, +year = {2004}, +isbn = {1581137028}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/985692.985712}, +doi = {10.1145/985692.985712}, +abstract = {Debugging is still among the most common and costly of programming activities. One reason is that current debugging tools do not directly support the inquisitive nature of the activity. Interrogative Debugging is a new debugging paradigm in which programmers can ask why did and even why didn't questions directly about their program's runtime failures. The Whyline is a prototype Interrogative Debugging interface for the Alice programming environment that visualizes answers in terms of runtime events directly relevant to a programmer's question. Comparisons of identical debugging scenarios from user tests with and without the Whyline showed that the Whyline reduced debugging time by nearly a factor of 8, and helped programmers complete 40% more tasks.}, +booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems}, +pages = {151–158}, +numpages = {8}, +keywords = {debugging, program slicing, Alice}, +location = {Vienna, Austria}, +series = {CHI '04} +} + +@article{Soremekun2021, +title = {Locating Faults with Program Slicing: An Empirical Analysis}, +author = {Ezekiel Soremekun and Lukas Kirschner and Marcel B{\"o}hme and Andreas Zeller}, +journal = {Empirical Software Engineering}, +year = {2021}, +url = {https://figshare.com/articles/conference_contribution/Locating_Faults_with_Program_Slicing_-_An_Empirical_Analysis_-_Replication_Package/13369400/1} +} + +@ARTICLE{LeGoues2012, + author={C. {Le Goues} and T. {Nguyen} and S. {Forrest} and W. {Weimer}}, + journal={IEEE Transactions on Software Engineering}, + title={GenProg: A Generic Method for Automatic Software Repair}, + year={2012}, + volume={38}, + number={1}, + pages={54--72}, + doi={10.1109/TSE.2011.104}, + url={https://ieeexplore.ieee.org/document/6035728} +} + +@article{Pei2014, + author={Y. {Pei} and C. A. {Furia} and M. {Nordio} and Y. {Wei} and B. {Meyer} and A. {Zeller}}, + journal={IEEE Transactions on Software Engineering}, + title={Automated Fixing of Programs with Contracts}, + year={2014}, + volume={40}, + number={5}, + pages={427--449}, + doi={10.1109/TSE.2014.2312918}, + url={https://ieeexplore.ieee.org/document/6776507} +} + +@inproceedings{Nguyen2013, +author = {Nguyen, Hoang Duong Thien and Qi, Dawei and Roychoudhury, Abhik and Chandra, Satish}, +title = {SemFix: Program Repair via Semantic Analysis}, +year = {2013}, +isbn = {9781467330763}, +publisher = {IEEE Press}, +abstract = {Debugging consumes significant time and effort in any major software development project. Moreover, even after the root cause of a bug is identified, fixing the bug is non-trivial. Given this situation, automated program repair methods are of value. In this paper, we present an automated repair method based on symbolic execution, constraint solving and program synthesis. In our approach, the requirement on the repaired code to pass a given set of tests is formulated as a constraint. Such a constraint is then solved by iterating over a layered space of repair expressions, layered by the complexity of the repair code. We compare our method with recently proposed genetic programming based repair on SIR programs with seeded bugs, as well as fragments of GNU Coreutils with real bugs. On these subjects, our approach reports a higher success-rate than genetic programming based repair, and produces a repair faster.}, +booktitle = {Proceedings of the 2013 International Conference on Software Engineering}, +pages = {772--781}, +numpages = {10}, +location = {San Francisco, CA, USA}, +url = {https://dl.acm.org/doi/10.5555/2486788.2486890}, +series = {ICSE '13} +} + +@inproceedings{Kalhauge2019, +author = {Kalhauge, Christian Gram and Palsberg, Jens}, +title = {Binary Reduction of Dependency Graphs}, +year = {2019}, +isbn = {9781450355728}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3338906.3338956}, +doi = {10.1145/3338906.3338956}, +abstract = {Delta debugging is a technique for reducing a failure-inducing input to a small input that reveals the cause of the failure. This has been successful for a wide variety of inputs including C programs, XML data, and thread schedules. However, for input that has many internal dependencies, delta debugging scales poorly. Such input includes C#, Java, and Java bytecode and they have presented a major challenge for input reduction until now. In this paper, we show that the core challenge is a reduction problem for dependency graphs, and we present a general strategy for reducing such graphs. We combine this with a novel algorithm for reduction called Binary Reduction in a tool called J-Reduce for Java bytecode. Our experiments show that our tool is 12x faster and achieves more reduction than delta debugging on average. This enabled us to create and submit short bug reports for three Java bytecode decompilers.}, +booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, +pages = {556--566}, +numpages = {11}, +keywords = {dependencies, Debugging, reduction}, +location = {Tallinn, Estonia}, +series = {ESEC/FSE 2019} +} + +@inproceedings{Gopinath2020, +author = {Gopinath, Rahul and Mathis, Bj\"{o}rn and Zeller, Andreas}, +title = {Mining Input Grammars from Dynamic Control Flow}, +year = {2020}, +isbn = {9781450370431}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3368089.3409679}, +abstract = {One of the key properties of a program is its input specification. Having a formal input specification can be critical in fields such as vulnerability analysis, reverse engineering, software testing, clone detection, or refactoring. Unfortunately, accurate input specifications for typical programs are often unavailable or out of date. In this paper, we present a general algorithm that takes a program and a small set of sample inputs and automatically infers a readable context-free grammar capturing the input language of the program. We infer the syntactic input structure only by observing access of input characters at different locations of the input parser. This works on all stack based recursive descent input parsers, including parser combinators, and works entirely without program specific heuristics. Our Mimid prototype produced accurate and readable grammars for a variety of evaluation subjects, including complex languages such as JSON, TinyC, and JavaScript.}, +booktitle = {Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, +pages = {172–183}, +numpages = {12} +} + +@inproceedings{Bettenburg2008, +author = {Bettenburg, Nicolas and Just, Sascha and Schr\"{o}ter, Adrian and Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas}, +title = {What Makes a Good Bug Report?}, +year = {2008}, +isbn = {9781595939951}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/1453101.1453146}, +doi = {10.1145/1453101.1453146}, +abstract = {In software development, bug reports provide crucial information to developers. However, these reports widely differ in their quality. We conducted a survey among developers and users of APACHE, ECLIPSE, and MOZILLA to find out what makes a good bug report.The analysis of the 466 responses revealed an information mismatch between what developers need and what users supply. Most developers consider steps to reproduce, stack traces, and test cases as helpful, which are at the same time most difficult to provide for users. Such insight is helpful to design new bug tracking tools that guide users at collecting and providing more helpful information.Our CUEZILLA prototype is such a tool and measures the quality of new bug reports; it also recommends which elements should be added to improve the quality. We trained CUEZILLA on a sample of 289 bug reports, rated by developers as part of the survey. In our experiments, CUEZILLA was able to predict the quality of 31--48% of bug reports accurately.}, +booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of Software Engineering}, +pages = {308–318}, +numpages = {11}, +location = {Atlanta, Georgia}, +series = {SIGSOFT '08/FSE-16} +} + +@inproceedings{Bertram2010, +author = {Bertram, Dane and Voida, Amy and Greenberg, Saul and Walker, Robert}, +title = {Communication, Collaboration, and Bugs: The Social Nature of Issue Tracking in Small, Collocated Teams}, +year = {2010}, +isbn = {9781605587950}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/1718918.1718972}, +doi = {10.1145/1718918.1718972}, +abstract = {Issue tracking systems help organizations manage issue reporting, assignment, tracking, resolution, and archiving. Traditionally, it is the Software Engineering community that researches issue tracking systems, where software defects are reported and tracked as 'bug reports' within an archival database. Yet, as issue tracking is fundamentally a social process, it is important to understand the design and use of issue tracking systems from that perspective. Consequently, we conducted a qualitative study of issue tracking systems as used by small, collocated software development teams. We found that an issue tracker is not just a database for tracking bugs, features, and inquiries, but also a focal point for communication and coordination for many stakeholders within and beyond the software team. Customers, project managers, quality assurance personnel, and programmers all contribute to the shared knowledge and persistent communication that exists within the issue tracking system. These results were all the more striking because in spite of teams being collocated--which afforded frequent, face-to-face communication--the issue tracker was still used as a fundamental communication channel. We articulate various real-world practices surrounding issue trackers and offer design considerations for future systems.}, +booktitle = {Proceedings of the 2010 ACM Conference on Computer Supported Cooperative Work}, +pages = {291–300}, +numpages = {10}, +keywords = {shared knowledge, software engineering, issue tracking}, +location = {Savannah, Georgia, USA}, +series = {CSCW '10} +} + +@inproceedings{Bissyande2013, + author={T. F. Bissyandé and D. Lo and L. Jiang and L. Réveillère and J. Klein and Y. L. Traon}, + booktitle={2013 IEEE 24th International Symposium on Software Reliability Engineering (ISSRE)}, + title={Got issues? Who cares about it? A large scale investigation of issue trackers from GitHub}, + year={2013}, + volume={}, + number={}, + pages={188-197}, + doi={10.1109/ISSRE.2013.6698918} +} + +@inproceedings{Herzig2013, + author={K. Herzig and S. Just and A. Zeller}, + booktitle={2013 35th International Conference on Software Engineering (ICSE)}, + title={It's not a bug, it's a feature: How misclassification impacts bug prediction}, + year={2013}, + volume={}, + number={}, + pages={392-401}, + doi={10.1109/ICSE.2013.6606585} +} + +@inproceedings{Anvik2006, +author = {Anvik, John and Hiew, Lyndon and Murphy, Gail C.}, +title = {Who Should Fix This Bug?}, +year = {2006}, +isbn = {1595933751}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/1134285.1134336}, +doi = {10.1145/1134285.1134336}, +abstract = {Open source development projects typically support an open bug repository to which both developers and users can report bugs. The reports that appear in this repository must be triaged to determine if the report is one which requires attention and if it is, which developer will be assigned the responsibility of resolving the report. Large open source developments are burdened by the rate at which new bug reports appear in the bug repository. In this paper, we present a semi-automated approach intended to ease one part of this process, the assignment of reports to a developer. Our approach applies a machine learning algorithm to the open bug repository to learn the kinds of reports each developer resolves. When a new report arrives, the classifier produced by the machine learning technique suggests a small number of developers suitable to resolve the report. With this approach, we have reached precision levels of 57% and 64% on the Eclipse and Firefox development projects respectively. We have also applied our approach to the gcc open source development with less positive results. We describe the conditions under which the approach is applicable and also report on the lessons we learned about applying machine learning to repositories used in open source development.}, +booktitle = {Proceedings of the 28th International Conference on Software Engineering}, +pages = {361–370}, +numpages = {10}, +keywords = {bug report assignment, problem tracking, bug triage, issue tracking, machine learning}, +location = {Shanghai, China}, +series = {ICSE '06} +} + +@article{Kim2013, + author={D. Kim and Y. Tao and S. Kim and A. Zeller}, + journal={IEEE Transactions on Software Engineering}, + title={Where Should We Fix This Bug? A Two-Phase Recommendation Model}, + year={2013}, + volume={39}, + number={11}, + pages={1597-1610}, + doi={10.1109/TSE.2013.24} +} + +@inproceedings{Wang2008, +author = {Wang, Xiaoyin and Zhang, Lu and Xie, Tao and Anvik, John and Sun, Jiasu}, +title = {An Approach to Detecting Duplicate Bug Reports Using Natural Language and Execution Information}, +year = {2008}, +isbn = {9781605580791}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/1368088.1368151}, +doi = {10.1145/1368088.1368151}, +abstract = {An open source project typically maintains an open bug repository so that bug reports from all over the world can be gathered. When a new bug report is submitted to the repository, a person, called a triager, examines whether it is a duplicate of an existing bug report. If it is, the triager marks it as DUPLICATE and the bug report is removed from consideration for further work. In the literature, there are approaches exploiting only natural language information to detect duplicate bug reports. In this paper we present a new approach that further involves execution information. In our approach, when a new bug report arrives, its natural language information and execution information are compared with those of the existing bug reports. Then, a small number of existing bug reports are suggested to the triager as the most similar bug reports to the new bug report. Finally, the triager examines the suggested bug reports to determine whether the new bug report duplicates an existing bug report. We calibrated our approach on a subset of the Eclipse bug repository and evaluated our approach on a subset of the Firefox bug repository. The experimental results show that our approach can detect 67%-93% of duplicate bug reports in the Firefox bug repository, compared to 43%-72% using natural language information alone.}, +booktitle = {Proceedings of the 30th International Conference on Software Engineering}, +pages = {461–470}, +numpages = {10}, +keywords = {execution information, duplicate bug report, information retrieval}, +location = {Leipzig, Germany}, +series = {ICSE '08} +} + +@inproceedings{Gopinath2020, +author = {Gopinath, Rahul and Kampmann, Alexander and Havrikov, Nikolas and Soremekun, Ezekiel O. and Zeller, Andreas}, +title = {Abstracting Failure-Inducing Inputs}, +year = {2020}, +isbn = {9781450380089}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3395363.3397349}, +doi = {10.1145/3395363.3397349}, +abstract = {A program fails. Under which circumstances does the failure occur? Starting with a single failure-inducing input ("The input ((4)) fails") and an input grammar, the DDSET algorithm uses systematic tests to automatically generalize the input to an abstract failure-inducing input that contains both (concrete) terminal symbols and (abstract) nonterminal symbols from the grammar—for instance, "(())", which represents any expression in double parentheses. Such an abstract failure-inducing input can be used (1) as a debugging diagnostic, characterizing the circumstances under which a failure occurs ("The error occurs whenever an expression is enclosed in double parentheses"); (2) as a producer of additional failure-inducing tests to help design and validate fixes and repair candidates ("The inputs ((1)), ((3 * 4)), and many more also fail"). In its evaluation on real-world bugs in JavaScript, Clojure, Lua, and UNIX command line utilities, DDSET’s abstract failure-inducing inputs provided to-the-point diagnostics, and precise producers for further failure inducing inputs.}, +booktitle = {Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis}, +pages = {237–248}, +numpages = {12}, +keywords = {error diagnosis, debugging, grammars, failure-inducing inputs}, +location = {Virtual Event, USA}, +series = {ISSTA 2020} +} + +@inproceedings{Kampmann2020, +author = {Kampmann, Alexander and Havrikov, Nikolas and Soremekun, Ezekiel O. and Zeller, Andreas}, +title = {When Does My Program Do This? Learning Circumstances of Software Behavior}, +year = {2020}, +isbn = {9781450370431}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3368089.3409687}, +abstract = {A program fails. Under which circumstances does the failure occur? Our Alhazenapproach starts with a run that exhibits a particular behavior and automatically determines input features associated with the behavior in question: (1) We use a grammar to parse the input into individual elements. (2) We use a decision tree learner to observe and learn which input elements are associated with the behavior in question. (3) We use the grammar to generate additional inputs to further strengthen or refute hypotheses as learned associations. (4) By repeating steps 2 and 3, we obtain a theory that explains and predicts the given behavior. In our evaluation using inputs for find, grep, NetHack, and a JavaScript transpiler, the theories produced by Alhazen predict and produce failures with high accuracy and allow developers to focus on a small set of input features: “grep fails whenever the --fixed-strings option is used in conjunction with an empty search string.”}, +booktitle = {Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, +pages = {1228–1239}, +numpages = {12} +} + +@inproceedings{Gopinath2021, + title = {Input Algebras}, + author = {Gopinath, Rahul and Nemati, Hamed and Zeller, Andreas}, + booktitle = {International Conference on Software Engineering (ICSE 2021)}, + year = {2021}, + url = {https://publications.cispa.saarland/3208/}, + note = {To Appear} +} + +@inproceedings{King2005, +author = {Samuel T. King and George W. Dunlap and Peter M. Chen}, +title = {Debugging Operating Systems with Time-Traveling Virtual Machines}, +booktitle = {2005 {USENIX} Annual Technical Conference ({USENIX} {ATC} 05)}, +year = {2005}, +address = {Anaheim, CA}, +url = {https://www.usenix.org/conference/2005-usenix-annual-technical-conference/debugging-operating-systems-time-traveling}, +publisher = {{USENIX} Association}, +month = apr, +} + +@inproceedings{Glerum2009, +author = {Glerum, Kirk and Kinshumann, Kinshuman and Greenberg, Steve and Aul, Gabriel and Orgovan, Vince and Nichols, Greg and Grant, David and Loihle, Gretchen and Hunt, Galen}, +title = {Debugging in the (Very) Large: Ten Years of Implementation and Experience}, +year = {2009}, +isbn = {9781605587523}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/1629575.1629586}, +doi = {10.1145/1629575.1629586}, +abstract = {Windows Error Reporting (WER) is a distributed system that automates the processing of error reports coming from an installed base of a billion machines. WER has collected billions of error reports in ten years of operation. It collects error data automatically and classifies errors into buckets, which are used to prioritize developer effort and report fixes to users. WER uses a progressive approach to data collection, which minimizes overhead for most reports yet allows developers to collect detailed information when needed. WER takes advantage of its scale to use error statistics as a tool in debugging; this allows developers to isolate bugs that could not be found at smaller scale. WER has been designed for large scale: one pair of database servers can record all the errors that occur on all Windows computers worldwide.}, +booktitle = {Proceedings of the ACM SIGOPS 22nd Symposium on Operating Systems Principles}, +pages = {103–116}, +numpages = {14}, +keywords = {classifying, statistics-based debugging., error reports, blue screen of death, minidump, bucketing, labeling}, +location = {Big Sky, Montana, USA}, +series = {SOSP '09} +} diff --git a/notebooks/shared/gitignore b/notebooks/shared/gitignore new file mode 100644 index 000000000..59a403aa3 --- /dev/null +++ b/notebooks/shared/gitignore @@ -0,0 +1,103 @@ +# Mac-specific files +.DS_Store + +# Tool-specific files +.ipynb_checkpoints/ +__pycache__/ + +# ipypublish sources +ipypublish +ipypublish-master + +# Chapter target files +html/*.html +html/*_files/ +beta/html/*.html +beta/html/*_files/ + +code/*.py +code/*.py.out +beta/code/*.py +beta/code/*.py.out + +mypy/*.py + +slides/*.html +slides/*_files/ +beta/slides/*.html +beta/slides/*_files/ + +pdf/*.pdf +pdf/*.tex +pdf/*_files/ + +*.blg + +nbpdf/*.pdf + +word/*.docx +word/*_files/ + +epub/*.epub +epub/*_files/ + +markdown/*.md +markdown/*_files/ + +full_notebooks/*.ipynb +full_notebooks/*_utils/* +full_notebooks/bookutils/* + +rendered/*.ipynb + +.depend/*.makefile + +notebooks/.jupyterlab.pid + +# Derived pics +notebooks/PICS/Sitemap.svg + +# Temp files +book +*book +geckodriver.log +orders.db +FuzzManager +simply-buggy +**/my_project +import*.py +.ipynb_checkpoints + +# Docker-make output directory +build-output/ + +# LS +.virtual_documents +notebooks/notebooks + +# Logs +binder.log +*.py.out +.jupyter.log +.jupyterlab.log + +# Caches +.mypy_cache/ + +# .bib timestamps +.*.bib.* + +# Zeller's files +Security Testing +projects +papers +course +udacity +videos + +*.command +*.webloc +notebooks/CISPACourse.ipynb +notebooks/BugBoard.ipynb +notebooks/Todos.ipynb +notebooks/SimpleGrammarMiner.ipynb diff --git a/notebooks/shared/pycodestyle.cfg b/notebooks/shared/pycodestyle.cfg new file mode 100644 index 000000000..3073480bb --- /dev/null +++ b/notebooks/shared/pycodestyle.cfg @@ -0,0 +1,15 @@ +# Pycodestyle configuration options + +# Ignore these warnings and errors: +# - W291 trailing whitespace (generated) +# - E501 line too long (generated) +# - E402 module import not at top level (useful for examples) +# - W391 blank line at end of file (generated) +# - E703 statement ends in a semicolon (we need this for non-text output) +# - W503 line break before binary operator (we want this rather than after binary operators) +# - W504 line break after binary operator (frequently generated by autopep8) +# - E722 do not use bare 'except' (we use this to illustrate a problem) +# - E30* various blank line counts (generated) + +[pycodestyle] +ignore = W291,E501,E402,W391,E703,W503,W504,E722,E302,E303,E305 diff --git a/notebooks/shared/utils/README.md b/notebooks/shared/utils/README.md new file mode 100644 index 000000000..483bcd7d3 --- /dev/null +++ b/notebooks/shared/utils/README.md @@ -0,0 +1 @@ +This folder contains various utilities required for production. In particular, [post-html.py](post-html.py) creates the HTML files from notebooks. \ No newline at end of file diff --git a/notebooks/shared/utils/add_metadata.py b/notebooks/shared/utils/add_metadata.py new file mode 100755 index 000000000..610007b5b --- /dev/null +++ b/notebooks/shared/utils/add_metadata.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# Add proper metadata to fuzzingbook notebook + +""" +usage: + +python add-metadata.py [--project PROJECT] [--titlepage] A.ipynb > A'.ipynb +""" + +import io +import os +import sys +import re + +import nbformat + +def get_text_contents(notebook): + contents = "" + for cell in notebook.cells: + if cell.cell_type == 'markdown': + contents += "".join(cell.source) + "\n\n" + + # print("Contents of", notebook, ": ", repr(contents[:100])) + + return contents + + +def get_title(notebook): + """Return the title from a notebook file""" + contents = get_text_contents(notebook) + match = re.search(r'^# (.*)', contents, re.MULTILINE) + title = match.group(1).replace(r'\n', '') + # print("Title", title.encode('utf-8')) + return title + + + +def add_document_metadata(notebook, project, titlepage): + """Add document metadata""" + # No cell toolbar for published notebooks + if 'celltoolbar' in notebook.metadata: + del notebook.metadata['celltoolbar'] + + # Add bibliography + if 'ipub' not in notebook.metadata: + notebook.metadata['ipub'] = {} + if 'bibliography' not in notebook.metadata['ipub']: + notebook.metadata['ipub']['bibliography'] = 'fuzzingbook.bib' + + if titlepage: + # Add title + chapter_title = get_title(notebook) + if project == 'fuzzingbook': + notebook.metadata['ipub']['titlepage'] = { + "author": "Andreas Zeller, Rahul Gopinath, Marcel Böhme, Gordon Fraser, and Christian Holler", + "title": chapter_title, + "subtitle": 'A Chapter of "The Fuzzing Book"' + } + elif project == 'debuggingbook': + notebook.metadata['ipub']['titlepage'] = { + "author": "Andreas Zeller", + "title": chapter_title, + "subtitle": 'A Chapter of "The Debugging Book"' + } + + # Add table of contents + notebook.metadata['toc'] = { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": True, + "sideBar": True, + "skip_h1_title": True, + "title_cell": "", + "title_sidebar": "Contents", + "toc_cell": False, + "toc_position": {}, + "toc_section_display": True, + "toc_window_display": True + } + notebook.metadata["toc-autonumbering"] = False + +def add_solution_metadata(notebook): + """Add solution metadata""" + + within_solution = False + previous_cell = None + + for cell in notebook.cells: + if cell.cell_type == 'markdown': + text = cell.source + if text.startswith("**Solution"): + within_solution = True + previous_cell.metadata['solution2_first'] = True + previous_cell.metadata['solution2'] = 'hidden' + elif text.startswith("#"): + within_solution = False + + if within_solution: + cell.metadata['solution2'] = 'hidden' + if 'slideshow' not in cell.metadata: + cell.metadata['slideshow'] = {} + cell.metadata['slideshow']['slide_type'] = "skip" + + previous_cell = cell + + + +def add_metadata(filename, project, titlepage): + # Read in + with io.open(filename, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, as_version=4) + + add_document_metadata(notebook, project, titlepage) + add_solution_metadata(notebook) + + # Write out + # Include a newline at the end, as Jupyterlab does + notebook_content = nbformat.writes(notebook) + '\n' + sys.stdout.buffer.write(notebook_content.encode('utf-8')) + + + +if __name__ == '__main__': + args = sys.argv[1:] + + if args[0] == "--project": + project = args[1] + args = args[2:] + else: + project = 'fuzzingbook' + + if args[0] == "--titlepage": + titlepage = True + args = args[1:] + else: + titlepage = False + + notebooks = args + + if not notebooks: + print(__doc__, file=sys.stderr) + sys.exit(1) + + for notebook in notebooks: + add_metadata(notebook, project, titlepage) diff --git a/notebooks/shared/utils/bnfize b/notebooks/shared/utils/bnfize new file mode 100755 index 000000000..7c7004cc4 --- /dev/null +++ b/notebooks/shared/utils/bnfize @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Replace '$FOO' by '' + +import re +import fileinput +import sys + +RE_SYMBOL = re.compile(r'(\$[a-zA-Z_]+[a-zA-Z_]*)') + +def bnfize(line): + while True: + m = re.search(RE_SYMBOL, line) + if m is None: + break + + symbol = line[m.start():m.end()] + line = line[:m.start()] + "<" + symbol[1:].lower() + ">" + line[m.end():] + + return line + + +assert bnfize("1 + $FOO + 2") == "1 + + 2" +assert bnfize("$FOO") == "" +assert bnfize("1 + 2") == "1 + 2" + +for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")): + conversion = bnfize(line) + sys.stdout.buffer.write(conversion.encode("utf-8")) \ No newline at end of file diff --git a/notebooks/shared/utils/htmltonbpdf.py b/notebooks/shared/utils/htmltonbpdf.py new file mode 100755 index 000000000..9d30e39d8 --- /dev/null +++ b/notebooks/shared/utils/htmltonbpdf.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# Convert HTML file into PDF with notebook attached +""" +usage: + +python htmltonbpdf.py /PATH/TO/X.html X.ipynb X.pdf +""" + +import argparse +import asyncio +import tempfile +import sys +import os + +# From https://github.com/betatim/notebook-as-pdf + +import asyncio +import json +import os +import tempfile +import concurrent.futures +import nbconvert + +from pyppeteer import launch +from traitlets import default +import pikepdf +from nbconvert.exporters import Exporter + +import re + +RE_HTML_LINK = re.compile(r'href="([^/:]+).html"') + +def fix_html(contents): + # Let local HTML links point to (local?) PDF files instead + return RE_HTML_LINK.sub(r'href="\1.pdf"', contents) + +RE_PDF_LINK = re.compile(r'file://[^)]*/([^/]+).html') + +def fix_pdf(contents): + # Let local HTML links point to (local?) PDF files instead + return RE_PDF_LINK.sub(r'href="file:/\1.pdf"', contents) + + +async def html_to_pdf(html_file, pdf_file): + """Convert a HTML file to a PDF""" + browser = await launch(handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False) + page = await browser.newPage() + await page.setViewport(dict(width=640, height=640)) + + # We use 'print' as media type to avoid having the menu clutter things + await page.emulateMedia("print") + + await page.goto(f"file:///{html_file}", {"waitUntil": ["networkidle2"]}) + + page_margins = { + "left": "0px", + "right": "0px", + "top": "0px", + "bottom": "0px", + } + + dimensions = await page.evaluate( + """() => { + return { + width: document.body.scrollWidth, + height: document.body.scrollHeight, + offsetHeight: document.body.offsetHeight, + deviceScaleFactor: window.devicePixelRatio, + } + }""" + ) + width = dimensions["width"] + height = dimensions["height"] + + await page.addStyleTag( + { + "content": """ + #notebook-container { + box-shadow: none; + padding: unset + } + div.cell { + page-break-inside: avoid; + } + div.output_wrapper { + page-break-inside: avoid; + } + div.output { + page-break-inside: avoid; + } + """ + } + ) + + await page.pdf( + { + "path": pdf_file, + "width": width, + # Adobe can not display pages longer than 200inches. So we limit + # ourselves to that and start a new page if needed. + "height": min(height, 200 * 72), + "printBackground": True, + "margin": page_margins, + } + ) + + await browser.close() + + +def attach_notebook(pdf_in, pdf_out, notebook): + N = pikepdf.Name + + main_pdf = pikepdf.open(pdf_in) + + the_file = pikepdf.Stream(main_pdf, notebook["contents"]) + the_file[N("/Type")] = N("/EmbeddedFile") + + file_wrapper = pikepdf.Dictionary(F=the_file) + + fname = notebook["file_name"] + embedded_file = pikepdf.Dictionary( + Type=N("/Filespec"), UF=fname, F=fname, EF=file_wrapper + ) + + name_tree = pikepdf.Array([pikepdf.String(fname), embedded_file]) + + embedded_files = pikepdf.Dictionary(Names=name_tree) + + names = pikepdf.Dictionary(EmbeddedFiles=embedded_files) + + main_pdf.Root[N("/Names")] = names + + main_pdf.save(pdf_out) + + +async def notebook_to_pdf(notebook, pdf_path, config=None, resources=None, **kwargs): + """Convert a notebook to PDF""" + if config is None: + config = {} + exporter = nbconvert.HTMLExporter(config=config) + exported_html, _ = exporter.from_notebook_node( + notebook, resources=resources, **kwargs + ) + + with tempfile.NamedTemporaryFile(suffix=".html") as f: + f.write(exported_html.encode()) + f.flush() + await html_to_pdf(f.name, pdf_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--fix-html-links', action='store_true') + parser.add_argument('--fix-pdf-links', action='store_true') + parser.add_argument('--attach', action='store_true') + parser.add_argument("html", help="Notebook rendered as HTML") + parser.add_argument("notebook", help="Notebook source (to be attached)") + parser.add_argument("pdf", help="Notebook PDF output") + args = parser.parse_args() + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + html_contents = open(args.html, encoding='utf-8').read() + if args.fix_html_links: + html_contents = fix_html(html_contents) + + with tempfile.NamedTemporaryFile(suffix=".html", + dir=os.path.dirname(args.html)) as f_html: + f_html.write(html_contents.encode('utf-8')) + f_html.flush() + + with tempfile.NamedTemporaryFile(suffix=".pdf") as f_pdf: + loop.run_until_complete(html_to_pdf(f_html.name, f_pdf.name)) + + if args.fix_pdf_links: + # Note: This currently does not work; it garbles PDF content + pdf_contents = open(f_pdf.name, encoding='latin-1').read() + pdf_contents = fix_pdf(pdf_contents) + open(f_pdf.name, "wb").write(pdf_contents.encode('latin-1')) + + if args.attach: + notebook = {} + notebook["file_name"] = args.notebook + notebook["contents"] = open(args.notebook, "rb").read() + + attach_notebook(f_pdf.name, args.pdf, notebook) + else: + # Simply copy + open(args.pdf, "wb").write(open(f_pdf.name, "rb").read()) diff --git a/notebooks/shared/utils/nbautopep8.py b/notebooks/shared/utils/nbautopep8.py new file mode 100755 index 000000000..a1c259bd3 --- /dev/null +++ b/notebooks/shared/utils/nbautopep8.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# Apply autopep8 on code cells of given notebook +""" +usage: + +python nbautopep8.py [autopep8 options] notebooks... +""" + +import io, os, sys, types, re + +import nbformat +import autopep8 + +# If True, split cells that contain more than one def/use +split_cells = False + +def prefix_code(code, prefix): + return prefix + code.replace('\n', '\n' + prefix) + +def print_utf8(s): + sys.stdout.buffer.write(s.encode('utf-8')) + +def autopep8_notebook(job_args): + notebook_path, options = job_args + + # load the notebook + if notebook_path == '-': + notebook = nbformat.read(sys.stdin, 4) + else: + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + changed_cells = 0 + i = 0 + + while i < len(notebook.cells): + cell = notebook.cells[i] + + if cell.cell_type != 'code': + i += 1 + continue + + if cell.source.startswith('!'): + # Shell magic -- leave unchanged + i += 1 + continue + + code = cell.source + '\n' + + # run autopep8 on it + fixed_code = autopep8.fix_code(code, options) + + code_sep = fixed_code.find('\n\n\n') + if split_cells and code_sep >= 0: + # Multiple defs/uses in one cell; split + this_code = fixed_code[:code_sep + 1].strip() + next_code = fixed_code[code_sep + 3:].strip() + + if len(this_code) > 0 and len(next_code) > 0: + next_cell = nbformat.v4.new_code_cell(next_code) + if cell.metadata: + next_cell.metadata = cell.metadata + cell.source = this_code + notebook.cells = notebook.cells[:i] + [cell] + [next_cell] + notebook.cells[i + 1:] + changed_cells += 1 + continue + + if code.strip() == fixed_code.strip(): + i += 1 + continue + + # Set it again + cell.source = fixed_code.strip() + changed_cells += 1 + i += 1 + + notebook_contents = (nbformat.writes(notebook) + '\n').encode('utf-8') + + if args.in_place: + if changed_cells > 0: + temp_notebook_path = notebook_path + "~" + with io.open(temp_notebook_path, 'wb') as f: + f.write(notebook_contents) + os.rename(temp_notebook_path, notebook_path) + print("%s: %d cell(s) changed" % (notebook_path, changed_cells)) + else: + print("%s: unchanged" % notebook_path) + else: + sys.stdout.buffer.write(notebook_contents) + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) == 0 or args[0] == "--help" or args[0] == "-h": + print("usage: nbautopep8 [--split-cells] [autopep8-options...] notebooks...") + print("Automatically formats Python code cells in notebooks") + print("to conform to the PEP 8 style guide.") + print() + print("autopep8-options include:") + + if len(args) > 0 and (args[0] == "--split-cells" or args[0] == "-s"): + split_cells = True + args = args[1:] + + args = autopep8.parse_args(args, apply_config=True) + + if args.diff: + print("Unsupported option: --diff") + sys.exit(2) + if args.recursive: + print("Unsupported option: --recursive") + sys.exit(2) + if args.line_range: + print("Unsupported option: --line-range") + sys.exit(2) + + if args.jobs > 1: + import multiprocessing + pool = multiprocessing.Pool(args.jobs) + pool.map(autopep8_notebook, + [(notebook, args) for notebook in args.files]) + else: + for notebook in args.files: + autopep8_notebook((notebook, args)) diff --git a/notebooks/shared/utils/nbautoslide.py b/notebooks/shared/utils/nbautoslide.py new file mode 100755 index 000000000..65eece72d --- /dev/null +++ b/notebooks/shared/utils/nbautoslide.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# (Re)generate slide metadata automatically +""" +usage: + +python nbautoslide.py notebooks... +""" + +import io, os, sys, types, re +import argparse +import nbformat + + +def prefix_code(code, prefix): + return prefix + code.replace('\n', '\n' + prefix) + +def print_utf8(s): + sys.stdout.buffer.write(s.encode('utf-8')) + +# Slide types +SLIDE = 'slide' +SUBSLIDE = 'subslide' +FRAGMENT = 'fragment' +SKIP = 'skip' +NO_SLIDE = 'none' + +CHARS_PER_LINE = 70 +LINES_PER_SLIDE = 15 + +def estimate_lines(cell): + lines = 0 + if cell.cell_type == 'code': + code = cell.source + lines = code.count('\n') + 1 + + if cell.cell_type == 'markdown': + text = cell.source + lines = int(len(text) / CHARS_PER_LINE) + text.count('\n') + 1 + + if 'outputs' in cell: + for output in cell.outputs: + if 'data' in output: + data = output.data + if 'text/plain' in data: + text_data = data['text/plain'] + lines += text_data.count('\n') + 1 + elif 'text' in output: + text_data = output.text + lines += text_data.count('\n') + 1 + else: + # Assume the worst + lines = LINES_PER_SLIDE + + # print(repr(cell.outputs)[:20] + "..." + "\t" + repr(lines)) + + # print(repr(cell.source[:20] + "...") + "\t" + repr(lines)) + + return lines + +def autoslide_notebook(notebook_path, args): + # load the notebook + if notebook_path == '-': + notebook = nbformat.read(sys.stdin, 4) + else: + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + changed_cells = 0 + prev_cell = None + lines = 0 + + for cell in notebook.cells: + slide_type = FRAGMENT # Default + + if cell.cell_type == 'markdown': + if cell.source.startswith('# ') or cell.source.startswith('## '): + # Main header + slide_type = SLIDE + elif cell.source.startswith('#') or cell.source.startswith('**'): + # Sub header + slide_type = SUBSLIDE + + elif cell.cell_type == 'code': + if cell.source.startswith('import ') or cell.source.startswith('from '): + # Generally uninteresting + slide_type = SKIP + + else: + # Unknown cell type + slide_type = SKIP + + # Check for overflows + cell_lines = estimate_lines(cell) + if slide_type == FRAGMENT: + if lines + cell_lines > LINES_PER_SLIDE: + slide_type = SUBSLIDE + lines = cell_lines + else: + lines += cell_lines + elif slide_type == SLIDE or slide_type == SUBSLIDE: + lines = cell_lines + + # if args.in_place: + # print(repr(cell.source[:20] + "...") + "\t" + slide_type + "\t" + repr(lines)) + + # Set slide type + if args.reset or 'metadata' not in cell or 'slideshow' not in cell.metadata: + if 'metadata' not in cell: + cell['metadata'] = {} + if 'slideshow' not in cell.metadata: + cell.metadata['slideshow'] = {} + if 'slide_type' not in cell.metadata.slideshow: + cell.metadata.slideshow['slide_type'] = NO_SLIDE + + old_slide_type = cell.metadata.slideshow.slide_type + + if slide_type != old_slide_type: + changed_cells += 1 + cell.metadata.slideshow.slide_type = slide_type + + # Save last cell + last_cell = cell + + + notebook_contents = (nbformat.writes(notebook) + '\n').encode('utf-8') + + if args.in_place: + if changed_cells > 0: + temp_notebook_path = notebook_path + "~" + with io.open(temp_notebook_path, 'wb') as f: + f.write(notebook_contents) + os.rename(temp_notebook_path, notebook_path) + print("%s: %d cell(s) changed" % (notebook_path, changed_cells)) + else: + print("%s: unchanged" % notebook_path) + else: + sys.stdout.buffer.write(notebook_contents) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--reset", help="reconstruct all slide metadata", action='store_true') + parser.add_argument("--in-place", help="change notebooks in place", action='store_true') + parser.add_argument("notebooks", nargs='*', help="notebooks to add slide info to") + args = parser.parse_args() + + for notebook in args.notebooks: + autoslide_notebook(notebook, args) \ No newline at end of file diff --git a/notebooks/shared/utils/nbdepend.py b/notebooks/shared/utils/nbdepend.py new file mode 100755 index 000000000..c12074363 --- /dev/null +++ b/notebooks/shared/utils/nbdepend.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +# Issue dependencies for given notebook(s) +""" +usage: + +python nbdepend.py A.ipynb B.ipynb C.ipynb > Makefile_deps +""" + +import io, os, types, re + +from IPython import get_ipython +from IPython.core.interactiveshell import InteractiveShell + +import nbformat +import argparse +import textwrap +import warnings + +import markdown +from bs4 import BeautifulSoup + +from graphviz import Digraph, Source + +RE_IMPORT = re.compile(r"^ *import *([a-zA-Z0-9_]+)", re.MULTILINE) +RE_FROM = re.compile(r"^ *from *([a-zA-Z0-9_]+) *import", re.MULTILINE) + + +def notebook_dependencies(notebook_name, include_minor_dependencies=True, path=None): + # notebook_path = import_notebooks.find_notebook(notebook_name, path) + notebook_path = notebook_name + + # load the notebook + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + shell = InteractiveShell.instance() + + modules = set() + for cell in notebook.cells: + if cell.cell_type == 'code': + # transform the input to executable Python + code = shell.input_transformer_manager.transform_cell(cell.source) + if not include_minor_dependencies and code.find('# minor') >= 0: + continue + for match in re.finditer(RE_IMPORT, code): + modules.add(match.group(1)) + for match in re.finditer(RE_FROM, code): + modules.add(match.group(1)) + + return modules + +def print_notebook_dependencies(notebooks): + for notebook_name in notebooks: + for module in notebook_dependencies(notebook_name): + print(module) + + +def get_title(notebook): + """Return the title from a notebook file""" + contents = get_text_contents(notebook) + match = re.search(r'^# (.*)', contents, re.MULTILINE) + if match is None: + warnings.warn(notebook + ": no title") + return notebook + + title = match.group(1).replace(r'\n', '') + # print("Title", title.encode('utf-8')) + return title + +def get_intro(notebook): + """Return the first paragraph from a notebook file""" + intro = get_text_contents(notebook).strip() + while intro.startswith('#'): + intro = intro[intro.index('\n') + 1:] + intro = intro[:intro.find('\n\n')] + return intro + +def markdown_to_text(s): + """Convert Markdown to plain text""" + html = markdown.markdown(s) + return "".join(BeautifulSoup(html, features='lxml').findAll(text=True)).strip() + +def format_title(title): + """Break title into two lines if too long""" + title = textwrap.fill(title, break_long_words=False, width=20) + title = title.replace(" of\n", "\nof ") + title = title.replace("Failure\nOrigins", "\nFailure Origins") + return title + +def get_text_contents(notebook): + with io.open(notebook, 'r', encoding='utf-8') as f: + nb = nbformat.read(f, as_version=4) + + contents = "" + for cell in nb.cells: + if cell.cell_type == 'markdown': + contents += "".join(cell.source) + "\n\n" + + # print("Contents of", notebook, ": ", repr(contents[:100])) + + return contents + +def draw_notebook_dependencies(notebooks, + format='svg', transitive_reduction=True, clusters=True, project='fuzzingbook'): + dot = Digraph(comment="Notebook dependencies") + # dot.attr(size='20,30', rank='max') + + if project == 'debuggingbook': + fontname = 'Raleway, Helvetica, Arial, sans-serif' + fontcolor = '#6A0DAD' + else: + fontname = 'Patua One, Helvetica, sans-serif' + fontcolor = '#B03A2E' + + node_attrs = { + 'shape': 'note', # note, plain, none + 'style': 'filled', + 'fontname': fontname, + 'fontcolor': fontcolor, + 'fillcolor': 'white' + } + cluster = None + + cluster_attrs = { + 'shape': 'plain', # note, plain, none + 'style': 'filled', + 'fontname': fontname, + 'fontcolor': 'black', + 'color': '#F0F0F0', + } + + for notebook_name in notebooks: + dirname = os.path.dirname(notebook_name) + basename = os.path.splitext(os.path.basename(notebook_name))[0] + title = get_title(notebook_name) + intro = markdown_to_text(get_intro(notebook_name)) + tooltip = f'{title} ({basename})\n\n{intro}' + + if clusters: + if title.startswith("Part"): + if cluster is not None: + cluster.attr(**cluster_attrs) + dot.subgraph(cluster) + + cluster = Digraph(name='cluster_' + basename) + cluster.node(basename, label=format_title(title), + URL='%s.ipynb' % basename, + tooltip=basename, shape='plain', fontname=fontname) + + elif cluster is not None: + cluster.node(basename) + + for module in notebook_dependencies(notebook_name, + include_minor_dependencies=False): + module_file = os.path.join(dirname, module + ".ipynb") + + if module_file in notebooks: + module_title = get_title(module_file) + module_intro = markdown_to_text(get_intro(module_file)) + module_tooltip = f'{module_title} ({module})\n\n{module_intro}' + + dot.node(basename, URL='%s.ipynb' % basename, + label=format_title(title), tooltip=tooltip, **node_attrs) + dot.node(module, URL='%s.ipynb' % module, + label=format_title(module_title), tooltip=module_tooltip, **node_attrs) + dot.edge(module, basename) + + if cluster is not None: + cluster.attr(**cluster_attrs) + dot.subgraph(cluster) + + if transitive_reduction: + dot.format = 'gv' + dot.save('depend.gv') + os.system('tred depend.gv > depend.gv~ && mv depend.gv~ depend.gv') + dot = Source.from_file('depend.gv') + os.remove('depend.gv') + + dot.format = format + dot.render('depend') + os.system('cat depend.' + format) + os.remove('depend') + os.remove('depend.' + format) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--graph", action='store_true', help="Produce graph") + parser.add_argument("--graph-format", action='store', default='svg', help="Graph format (gv, pdf, svg, ...)") + parser.add_argument("--project", action='store', help="Project name") + parser.add_argument("--transitive-reduction", action='store_true', help="Use transitive reduction") + parser.add_argument("--cluster-by-parts", action='store_true', help="Cluster by parts") + parser.add_argument("notebooks", nargs='*', help="notebooks to determine dependencies from") + args = parser.parse_args() + + if args.graph: + draw_notebook_dependencies(args.notebooks, args.graph_format, args.transitive_reduction, args.cluster_by_parts, args.project) + else: + print_notebook_dependencies(args.notebooks) diff --git a/notebooks/shared/utils/nbindex.py b/notebooks/shared/utils/nbindex.py new file mode 100755 index 000000000..fa07dfcf7 --- /dev/null +++ b/notebooks/shared/utils/nbindex.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Produce index for given notebook(s) +""" +usage: + +python nbindex.py A.ipynb B.ipynb C.ipynb +""" + + +import io +import os +import sys +import types +import re +import nbformat +import string + +emph_seen = False + + +def format_code(s): + return "`" + s + "`" + + +def format_function(s): + return format_code(s + "()") + + +def format_class(s): + return format_code(s) + " class" + + +def format_method(s): + # return format_code(s + "()") + " method" + return format_function(s) + + +def format_emph(s): + global emph_seen + emph_seen = True + return "_" + s + "_" + + +def format_emph_index(s): + return s + + +def title_to_fragment(s): + return "#" + s.replace(" ", "-") + + +ITEMS = { + 'code': [ + ("function", re.compile( + r"^def +([A-Za-z0-9_]+)", re.MULTILINE), format_function), + ("class", re.compile( + r"^class +([A-Za-z0-9_]+)", re.MULTILINE), format_class), + ("method", re.compile( + r"^ +def +([A-Za-z0-9_]+)", re.MULTILINE), format_method), + ("constant", re.compile( + r"^ *([A-Z][A-Z0-9_]+) += ", re.MULTILINE), format_code) + ], + 'markdown': [ + # ("_term_", re.compile(r"[^_]_([a-zA-Z]+)_[^_]"), format_emph), + ("*term*", re.compile(r"[^*]\*([a-zA-Z][a-zA-Z0-9-_ ]*[a-zA-Z])\*[^*]"), format_emph_index), + ("[link]", re.compile(r"\[(.*)]\(https?:"), format_emph_index), + ("`function()`", re.compile(r"`([a-zA-Z0-9_]+)\(`"), format_function), + ("`constant`", re.compile(r"`([A-Z][A-Z0-9_]+)`"), format_code), + ] +} + +RE_LOCATION = re.compile(r"^#+ (.*)") + +index = {} + + +def collect_index(notebook_name): + notebook_path = notebook_name + fragment = "" + title = None + subtitle = None + + # load the notebook + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + for cell in notebook.cells: + if cell.cell_type == 'markdown': + for match in RE_LOCATION.findall(cell.source): + if title is None: + title = match + else: + subtitle = match + fragment = title_to_fragment(match) + # print(match, file=sys.stderr) + + for (tp, regex, formatter) in ITEMS.get(cell.cell_type, []): + for match in regex.findall(cell.source): + entry = formatter(match) + if entry not in index: + index[entry] = [] + + if title is None: + print(notebook_name + ": cell without title", file=sys.stderr) + continue + + link = notebook_name + fragment + listed_title = title + if subtitle is not None: + listed_title += " (" + subtitle + ")" + index[entry].append((listed_title, link)) + + +def index_key(entry): + s = entry.upper() + while len(s) > 0 and s[0] not in string.ascii_letters: + s = s[1:] + return s if len(s) > 0 else entry + +LETTERS_PER_SECTION = 5 + +def index_markdown(): + index_sections = [] + + # Create entries, one cell per letter + entries = list(index.keys()) + entries.sort(key=index_key) + current_letter = None + s = "" + + for entry in entries: + if not index_key(entry): + continue + entry_letter = index_key(entry)[0] + if entry_letter != current_letter: + if s != "": + index_sections.append(s) + s = "" + + current_letter = entry_letter + s = "### " + entry_letter + "\n\n" + + s += "* " + entry + " — " + + occurrences = index[entry] + s += ", ".join(["[" + title + "](" + link + + ")" for title, link in occurrences]) + s += "\n" + + if s != "": + index_sections.append(s) + + # Insert in-between titles + ## A-E + ### A + ### B + new_index_sections = [] + while len(index_sections) > 0: + sublist = index_sections[:LETTERS_PER_SECTION] + index_sections = index_sections[LETTERS_PER_SECTION:] + first_letter = sublist[0][len("### ")] + last_letter = sublist[-1][len("### ")] + # Having – here breaks fragment links + new_index_sections += ["## " + first_letter + " - " + last_letter] + sublist + + return new_index_sections + +if __name__ == "__main__": + index = {} + for notebook in sys.argv[1:]: + collect_index(notebook) + index_sections = index_markdown() + + title = "# Index" + + if emph_seen: + title += """ +Please note: entries in _italics_ are listed only temporarily, as we convert terms to be indexed +with `*term*` instead of `_term_`. + """ + index_notebook = nbformat.v4.new_notebook( + cells=[nbformat.v4.new_markdown_cell(source=title)] + + [nbformat.v4.new_markdown_cell(source=cell_content) for cell_content in index_sections] + ) + + index_notebook.metadata = { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "toc-autonumbering": False + } + + sys.stdout.buffer.write(nbformat.writes(index_notebook).encode('utf-8')) diff --git a/notebooks/shared/utils/nbmerge.py b/notebooks/shared/utils/nbmerge.py new file mode 100755 index 000000000..f454f203a --- /dev/null +++ b/notebooks/shared/utils/nbmerge.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# Note, updated version of +# https://github.com/ipython/ipython-in-depth/blob/master/tools/nbmerge.py +""" +usage: + +python nbmerge.py A.ipynb B.ipynb C.ipynb > merged.ipynb +""" + +import io +import os +import sys + +import nbformat + +def merge_notebooks(filenames): + merged = None + for fname in filenames: + with io.open(fname, 'r', encoding='utf-8') as f: + nb = nbformat.read(f, as_version=4) + if merged is None: + merged = nb + else: + # TODO: add an optional marker between joined notebooks + # like an horizontal rule, for example, or some other arbitrary + # (user specified) markdown cell) + merged.cells.extend(nb.cells) + if not hasattr(merged.metadata, 'name'): + merged.metadata.name = '' + merged.metadata.name += "_merged" + sys.stdout.buffer.write(nbformat.writes(merged).encode('utf-8')) + +if __name__ == '__main__': + notebooks = sys.argv[1:] + if not notebooks: + print(__doc__, file=sys.stderr) + sys.exit(1) + + merge_notebooks(notebooks) \ No newline at end of file diff --git a/notebooks/shared/utils/nbshorten.py b/notebooks/shared/utils/nbshorten.py new file mode 100755 index 000000000..b88481db2 --- /dev/null +++ b/notebooks/shared/utils/nbshorten.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# Remove excursions from notebooks +""" +usage: + +python nbshorten.py notebooks... +""" + +import io, os, sys, types, re +import argparse +import nbformat + +def prefix_code(code, prefix): + return prefix + code.replace('\n', '\n' + prefix) + +def print_utf8(s): + sys.stdout.buffer.write(s.encode('utf-8')) + +def title_to_anchor(title): + return title.replace(' ', '-').replace('`', '') + +def link(site_prefix, notebook_path, title): + notebook_basename = os.path.splitext(os.path.basename(notebook_path))[0] + anchor = "#Excursion:-" + title_to_anchor(title) + return site_prefix + notebook_basename + ".html" + anchor + +RE_NOTEBOOK_TITLE = re.compile(r'#\s\s*(?P[^\n]*).*', re.DOTALL) +RE_BEGIN_EXCURSION = re.compile(r'##*\s\s*Excursion:\s*\s(?P<title>.*)') +RE_END_EXCURSION = re.compile(r'##*\s\s*[eE]nd.*[eE]xcursion') + +def shorten_notebook(notebook_path, args): + # load the notebook + if notebook_path == '-': + notebook = nbformat.read(sys.stdin, 4) + else: + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + in_excursion = False + skipped_cells = 0 + new_cells = [] + notebook_title = None + + for cell in notebook.cells: + skip_this_cell = in_excursion + + if notebook_title is None: + match_notebook_title = RE_NOTEBOOK_TITLE.match(cell.source) + if match_notebook_title: + notebook_title = match_notebook_title.group('title') + + if cell.cell_type == 'markdown': + match_begin_excursion = RE_BEGIN_EXCURSION.match(cell.source) + match_end_excursion = RE_END_EXCURSION.match(cell.source) + + if match_begin_excursion: + skip_this_cell = True + in_excursion = True + + if args.link_to: + # Add a link to online version + title = match_begin_excursion.group('title') + cell.source = f'({title} can be found in ["{notebook_title}" online]({link(args.link_to, notebook_path, title)}).)' + skip_this_cell = False + + elif match_end_excursion: + skip_this_cell = True + in_excursion = False + + if skip_this_cell: + skipped_cells += 1 + + if args.skip_slides: + # Don't include in slides + if 'metadata' not in cell: + cell['metadata'] = {} + if 'slideshow' not in cell.metadata: + cell.metadata['slideshow'] = {} + if 'slide_type' not in cell.metadata.slideshow: + cell.metadata.slideshow['slide_type'] = 'skip' + + else: + new_cells.append(cell) + + notebook.cells = new_cells + notebook_contents = (nbformat.writes(notebook) + '\n').encode('utf-8') + + if args.in_place: + if skipped_cells > 0: + temp_notebook_path = notebook_path + "~" + with io.open(temp_notebook_path, 'wb') as f: + f.write(notebook_contents) + os.rename(temp_notebook_path, notebook_path) + print("%s: %d cell(s) skipped" % (notebook_path, skipped_cells)) + else: + print("%s: unchanged" % notebook_path) + else: + sys.stdout.buffer.write(notebook_contents) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--link-to", help="link to online version on given site") + parser.add_argument("--skip-slides", help="skip excursion cells in slides", action='store_true') + parser.add_argument("--in-place", help="change notebooks in place", action='store_true') + parser.add_argument("notebooks", nargs='*', help="notebooks to add slide info to") + args = parser.parse_args() + + for notebook in args.notebooks: + shorten_notebook(notebook, args) \ No newline at end of file diff --git a/notebooks/shared/utils/nbspellcheck.py b/notebooks/shared/utils/nbspellcheck.py new file mode 100755 index 000000000..5ba6d7846 --- /dev/null +++ b/notebooks/shared/utils/nbspellcheck.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# Run spellchecker on notebook +""" +usage: + +python nbspellcheck.py notebooks... +""" + +import io, os, sys, types, re +import string +import nbformat + +# from https://github.com/barrust/pyspellchecker - `pip install pyspellchecker` +from spellchecker import SpellChecker + +KNOWN_WORDS = [ + 'microsoft', 'google', 'fuzzer', 'fuzzed', 'fuzzing', 'sanitizer', 'openssl', 'heartbleed', + 'xkcd', 'codenomicon', 'redblack', 'mypy', 'newline', 'nonprintable', 'llvm', 'cryptographic', + "you'll", "we'd", "here's", "memory-checking", 'fuzzers', 'placeholder', 'uninitialized', + 'cannot', 'sqrt', 'url', 'urls', 'iterable', "that's", "won't", "search-based", "mutation-based", + "non-executable", "you're", "isn't", 'lowercase', "grammar-based", "blog", "wikipedia", + "comma-separated", "turing-complete", "nonterminal", 'backus-naur', 'json', 'whitespace', + 'bnf', 'ebnf', 'nonterminals', 'string-based', 'tree-based', 'grammar-generated', + 'infty', 'algorithmically', 'subtree', 'visualizes', 'mutates', 'cgi-encoded', + 'white-box', 'black-box', 'initialization', 'non-implemented', 'jupyter', 'javascript', + 'firefox', 'debug', 'shellsort', 'quintillions', "we'll", 'zeller', 'rahul', 'gopinath', + 'iterates', 'parenthesized', 'metadata', 'html', 'github', 'makefile', "hasn't", + 'comprehensions', 'subclassing', 'subclassed', 'inline', 'markdown', 'bulleted', + 'cheatsheet', 'timeout', 'timeouts' +] + +spell = SpellChecker() +spell.word_frequency.load_words(KNOWN_WORDS) + +def print_utf8(s): + sys.stdout.buffer.write(s.encode('utf-8')) + +def normalize(word): + # print(repr(word)) + word = word.lower() + word = "".join([c for c in word if c in string.ascii_letters + "'-" ]) + return word + +def get_words(text): + words = text.split() + ws = [] + for word in words: + w = normalize(word) + if w == '' or len(w) > 20: + continue + ws.append(w) + return ws + +RE_STUFF = re.compile(r'\([htf]*tp[^)]*\)|\([^)]*.[^).]+\)|`[^`]*`') + +def strip_stuff(text): + return re.sub(RE_STUFF, '', text) + +def spellcheck_notebook(notebook_path): + # load the notebook + if notebook_path == '-': + notebook = nbformat.read(sys.stdin, 4) + else: + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + for cell in notebook.cells: + if cell.cell_type != 'markdown': + continue + + text = strip_stuff(cell.source) + words = get_words(text) + misspelled = spell.unknown(words) + if len(misspelled) > 0: + # print(cell.source) + for word in misspelled: + correction = spell.correction(word) + if word == correction: + print("%s: unknown word %s" % (notebook_path, repr(word))) + else: + print("%s: unknown word %s (did you mean %s?)" % + (notebook_path, repr(word), repr(correction))) + +if __name__ == "__main__": + for notebook in sys.argv[1:]: + spellcheck_notebook(notebook) diff --git a/notebooks/shared/utils/nbstats.py b/notebooks/shared/utils/nbstats.py new file mode 100755 index 000000000..10ac63c41 --- /dev/null +++ b/notebooks/shared/utils/nbstats.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Print statistics for given notebook(s) +""" +usage: + +python nbstats.py A.ipynb B.ipynb C.ipynb +""" + +import io, os, sys, types, re + +import nbformat + +def notebook_stats(notebook_name, path=None): + # notebook_path = import_notebooks.find_notebook(notebook_name, path) + notebook_path = notebook_name + + # load the notebook + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + notebook_loc = 0 + notebook_words = 0 + + for cell in notebook.cells: + if cell.cell_type == 'code': + cell_loc = cell.source.replace('\n\n', '\n').strip().count('\n') + 1 + # print(cell.source.encode('utf8'), cell_loc) + notebook_loc += cell_loc + else: + cell_words = len(cell.source.split()) + # print(cell.source.encode('utf8'), cell_words) + notebook_words += cell_words + + return notebook_loc, notebook_words + +FORMAT = "%35s%6d LOC%7d words" + +if __name__ == "__main__": + total_loc = 0 + total_words = 0 + for notebook in sys.argv[1:]: + notebook_loc, notebook_words = notebook_stats(notebook) + print(FORMAT % (notebook, notebook_loc, notebook_words)) + total_loc += notebook_loc + total_words += notebook_words + + if len(sys.argv) > 2: + print(FORMAT % ("Total", total_loc, total_words)) + \ No newline at end of file diff --git a/notebooks/shared/utils/nbsummarize.py b/notebooks/shared/utils/nbsummarize.py new file mode 100755 index 000000000..622eda6f2 --- /dev/null +++ b/notebooks/shared/utils/nbsummarize.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# Update synopsis and summaries for given notebook(s) +""" +usage: + +python nbsummarize.py notebook.ipynb +""" + +import io, os, sys, types, re + +from IPython import get_ipython +from IPython.core.interactiveshell import InteractiveShell + +import nbformat +import argparse +import base64 + +SYNOPSIS_TITLE = "## Synopsis" +SUMMARY_TITLE = "### Summary" + +img_count = 1 + +def cell_to_text(cell, notebook_path, notebook_basename): + """Convert a cell (and its output) into a single Markdown text.""" + if cell.cell_type != 'code': + return cell.source + "\n\n" + + # Code cell + synopsis = "```python\n>>> " + cell.source.replace('\n', '\n>>> ') + "\n```\n" + output_text = '' + + for output in cell.outputs: + text = None + + # SVG output + if text is None: + svg = None + try: + svg = output.data['image/svg+xml'] + except KeyError: + pass + except AttributeError: + pass + if svg is not None: + global img_count + + svg_basename = (notebook_basename + + '-synopsis-' + repr(img_count) + '.svg') + png_basename = (notebook_basename + + '-synopsis-' + repr(img_count) + '.png') + img_count += 1 + + svg_filename = os.path.join( + os.path.dirname(notebook_path), + 'PICS', svg_basename) + png_filename = os.path.join( + os.path.dirname(notebook_path), + 'PICS', png_basename) + + print("Creating", svg_filename) + with open(svg_filename, "w") as f: + f.write(svg) + + print("Creating", png_filename) + os.system('convert -density 300 ' + svg_filename + ' ' + png_filename) + + if 'RENDER_HTML' in os.environ: + # Render all HTML and SVG into PNG + text = "![](" + 'PICS/' + png_basename + ')' + else: + text = "![](" + 'PICS/' + svg_basename + ')' + + # PNG output + if text is None: + png = None + try: + png = output.data['image/png'] + except KeyError: + pass + except AttributeError: + pass + if png is not None: + png_basename = (notebook_basename + + '-synopsis-' + repr(img_count) + '.png') + img_count += 1 + + png_filename = os.path.join( + os.path.dirname(notebook_path), + 'PICS', png_basename) + + print("Creating", png_filename) + with open(png_filename, "wb") as f: + f.write(base64.b64decode(png, validate=True)) + text = "![](" + 'PICS/' + png_basename + ')' + + # Text output + if text is None: + try: + text = output.text + except AttributeError: + pass + + # Data output + if text is None: + try: + text = output.data['text/plain'] + except KeyError: + pass + + if text is not None: + output_text += text + '\n' + + if output_text: + if output_text.startswith('![]'): + synopsis += '\n' + output_text + '\n' + else: + synopsis += "```python\n" + output_text + "```\n" + + return synopsis + + +def notebook_synopsis(notebook_name): + notebook_path = notebook_name + + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + synopsis = "" + in_synopsis = False + first_synopsis = True + img_count = 1 + + notebook_noext = os.path.splitext(notebook_path)[0] + notebook_basename = os.path.basename(notebook_noext) + + for cell in notebook.cells: + if not first_synopsis and cell.source.startswith(SYNOPSIS_TITLE): + in_synopsis = True + synopsis = SYNOPSIS_TITLE + f""" +<!-- Automatically generated. Do not edit. --> + +To [use the code provided in this chapter](Importing.ipynb), write + +```python +>>> from {args.project}.{notebook_basename} import <identifier> +``` + +and then make use of the following features. +""" + synopsis += cell.source[len(SYNOPSIS_TITLE):] + "\n\n" + continue + elif cell.source.startswith("## "): + in_synopsis = False + first_synopsis = False + + if in_synopsis: + synopsis += cell_to_text(cell, notebook_path, notebook_basename) + + synopsis = synopsis.replace("```\n```python\n", "") + + return synopsis + + +def update_synopsis(notebook_name, synopsis): + notebook_path = notebook_name + global img_count + img_count = 1 + + # Read notebook + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + for i, cell in enumerate(notebook.cells): + if cell.source.startswith("## Synopsis"): + # Update cell + if cell.source == synopsis: + return + cell.source = synopsis + break + elif cell.source.startswith("## "): + # Insert cell before + new_cell = nbformat.v4.new_markdown_cell(source=synopsis) + notebook.cells = (notebook.cells[:i] + + [new_cell] + notebook.cells[i:]) + break + + # print(nbformat.writes(notebook)) + + # Write notebook out again + with io.open(notebook_path, 'w', encoding='utf-8') as f: + f.write(nbformat.writes(notebook)) + + print("Updated " + notebook_path) + + +# TODO: Have the "summarize" function return a target spec (section titles) for each summary +# The summary would then be inserted at the beginning of each section + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--project", help="project name", default="fuzzingbook") + parser.add_argument("--update", action='store_true', + help="Update summaries") + parser.add_argument("notebooks", nargs='*', help="notebooks to extract/update synopsis for") + args = parser.parse_args() + + for notebook in args.notebooks: + synopsis = notebook_synopsis(notebook) + if not synopsis: + continue + + if args.update: + update_synopsis(notebook, synopsis) + else: + print(synopsis, end='') diff --git a/notebooks/shared/utils/nbsynopsis.py b/notebooks/shared/utils/nbsynopsis.py new file mode 100755 index 000000000..1f936d8c4 --- /dev/null +++ b/notebooks/shared/utils/nbsynopsis.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python +# Update synopsis for given notebook(s) +""" +usage: + +python nbsynopsis.py notebook.ipynb +""" + +import io, os, sys, types, re + +from IPython import get_ipython +from IPython.core.interactiveshell import InteractiveShell + +import nbformat +import argparse +import base64 +import shutil + +SYNOPSIS_TITLE = "## Synopsis" + +RXTERM = re.compile('\x1b' + r'\[[^a-zA-Z]*[a-zA-Z]') +def unterm(text): + """Remove terminal escape commands such as <ESC>[34m""" + return RXTERM.sub('', text) + +def convert(svg_filename, png_filename): + """Convert `svg_filename` into `png_filename`.""" + + if os.path.exists('/Applications/Inkscape.app/'): + # Inkscape on a Mac + os.system(f"/Applications/Inkscape.app/Contents/MacOS/inkscape -d 300 '{svg_filename}' --export-filename '{png_filename}'") + + elif shutil.which('inkscape'): + # Inkscape on Linux + os.system(f"inkscape -d 300 '{svg_filename}' --export-filename '{png_filename}'") + + elif shutil.which('convert'): + # ImageMagick anywhere + os.system(f"convert -density 300 '{svg_filename}' '{png_filename}'") + + else: + raise ValueError("Please install Inkscape (preferred) or ImageMagick") + + +def notebook_synopsis(notebook_name): + notebook_path = notebook_name + + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + synopsis = "" + in_synopsis = False + first_synopsis = True + img_count = 1 + + notebook_noext = os.path.splitext(notebook_path)[0] + notebook_basename = os.path.basename(notebook_noext) + + for cell in notebook.cells: + if not first_synopsis and cell.source.startswith(SYNOPSIS_TITLE): + in_synopsis = True + synopsis = SYNOPSIS_TITLE + f""" +<!-- Automatically generated. Do not edit. --> + +To [use the code provided in this chapter](Importing.ipynb), write + +```python +>>> from {args.project}.{notebook_basename} import <identifier> +``` + +and then make use of the following features. +""" + synopsis += cell.source[len(SYNOPSIS_TITLE):] + "\n\n" + continue + elif cell.source.startswith("## "): + in_synopsis = False + first_synopsis = False + + if in_synopsis: + if cell.cell_type == 'code': + if cell.source.startswith("# ignore"): + pass + else: + synopsis += "```python\n>>> " + cell.source.replace('\n', '\n>>> ') + "\n```\n" + output_text = '' + for output in cell.outputs: + text = None + + # SVG output + if text is None: + svg = None + try: + svg = output.data['image/svg+xml'] + except KeyError: + pass + except AttributeError: + pass + if svg is not None: + svg_basename = (notebook_basename + + '-synopsis-' + repr(img_count) + '.svg') + png_basename = (notebook_basename + + '-synopsis-' + repr(img_count) + '.png') + img_count += 1 + + svg_filename = os.path.join( + os.path.dirname(notebook_path), + 'PICS', svg_basename) + png_filename = os.path.join( + os.path.dirname(notebook_path), + 'PICS', png_basename) + + print("Creating", svg_filename) + with open(svg_filename, "w") as f: + f.write(svg) + print("Creating", png_filename) + + convert(svg_filename, png_filename) + + if 'RENDER_HTML' in os.environ: + # Render all HTML and SVG into PNG + pics_name = png_basename + else: + pics_name = svg_basename + + text = ("```\n" + + '![](' + 'PICS/' + pics_name + ')\n' + + '```\n') + + # PNG output + if text is None: + png = None + try: + png = output.data['image/png'] + except KeyError: + pass + except AttributeError: + pass + if png is not None: + png_basename = (notebook_basename + + '-synopsis-' + repr(img_count) + '.png') + img_count += 1 + + png_filename = os.path.join( + os.path.dirname(notebook_path), + 'PICS', png_basename) + + print("Creating", png_filename) + with open(png_filename, "wb") as f: + f.write(base64.b64decode(png, validate=True)) + text = "```\n![](" + 'PICS/' + png_basename + ')\n```\n' + + # Markdown output + if text is None: + try: + text = "```\n" + output.data['text/markdown'] + "\n```\n" + except KeyError: + pass + except AttributeError: + pass + + # HTML output + if text is None: + try: + text = "```\n" + output.data['text/html'] + "\n```\n" + except KeyError: + pass + except AttributeError: + pass + + # Text output + if text is None: + try: + text = unterm(output.text) + except AttributeError: + pass + + # Data output + if text is None: + try: + text = unterm(output.data['text/plain'] + '\n') + except KeyError: + pass + + if text is not None: + output_text += text + + if output_text: + if output_text.startswith('![]'): + synopsis += '\n' + output_text + '\n' + else: + synopsis += "```python\n" + output_text + "```\n" + else: + synopsis += cell.source + "\n\n" + + synopsis = synopsis.replace("```python\n```\n", "\n") + synopsis = synopsis.replace("```\n```python\n", "\n") + synopsis = synopsis.replace("```\n```\n", "\n") + + return synopsis + +def skip_cell(cell): + # Don't include in slides + if 'metadata' not in cell: + cell['metadata'] = {} + if 'slideshow' not in cell.metadata: + cell.metadata['slideshow'] = {} + if 'slide_type' not in cell.metadata.slideshow: + cell.metadata.slideshow['slide_type'] = 'skip' + return cell + +def update_synopsis(notebook_name, synopsis): + notebook_path = notebook_name + + # Read notebook + with io.open(notebook_path, 'r', encoding='utf-8') as f: + notebook = nbformat.read(f, 4) + + for i, cell in enumerate(notebook.cells): + if cell.source.startswith("## Synopsis"): + # Update cell + if cell.source == synopsis: + return + cell.source = synopsis + cell = skip_cell(cell) + break + elif cell.source.startswith("## "): + # Insert cell before + new_cell = nbformat.v4.new_markdown_cell(source=synopsis) + new_cell = skip_cell(new_cell) + notebook.cells = (notebook.cells[:i] + + [new_cell] + notebook.cells[i:]) + break + + # print(nbformat.writes(notebook)) + + # Write notebook out again + with io.open(notebook_path, 'w', encoding='utf-8') as f: + f.write(nbformat.writes(notebook)) + + print("Updated " + notebook_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--project", help="project name", default="fuzzingbook") + parser.add_argument("--update", action='store_true', + help="Update synopsis section") + parser.add_argument("notebooks", nargs='*', help="notebooks to extract/update synopsis for") + args = parser.parse_args() + + for notebook in args.notebooks: + synopsis = notebook_synopsis(notebook) + if not synopsis: + continue + + if args.update: + update_synopsis(notebook, synopsis) + else: + print(synopsis, end='') diff --git a/notebooks/shared/utils/nbtoc.py b/notebooks/shared/utils/nbtoc.py new file mode 100755 index 000000000..a6af34e41 --- /dev/null +++ b/notebooks/shared/utils/nbtoc.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# Create table of contents for given notebook(s) +""" +usage: + +python nbtoc.py A.ipynb B.ipynb C.ipynb +""" + +import io, os, sys, types, re + +import nbformat +import argparse + +import markdown +from bs4 import BeautifulSoup +import html + + +def get_text_contents(notebook): + with io.open(notebook, 'r', encoding='utf-8') as f: + nb = nbformat.read(f, as_version=4) + + contents = "" + for cell in nb.cells: + if cell.cell_type == 'markdown': + contents += "".join(cell.source) + "\n\n" + + # print("Contents of", notebook, ": ", repr(contents[:100])) + + return contents + +def get_title(notebook): + """Return the title from a notebook file""" + contents = get_text_contents(notebook) + match = re.search(r'^# (.*)', contents, re.MULTILINE) + title = match.group(1).replace(r'\n', '') + if title.startswith('['): + title = title[1:title.find(']')] + + # print("Title", title.encode('utf-8')) + return title + +def get_intro(notebook): + """Return the first paragraph from a notebook file""" + intro = get_text_contents(notebook).strip() + while intro.startswith('#'): + intro = intro[intro.index('\n') + 1:] + intro = intro[:intro.find('\n\n')] + return intro + +def markdown_to_text(s): + """Convert Markdown to plain text""" + html = markdown.markdown(s) + return "".join(BeautifulSoup(html, features='lxml').findAll(text=True)).strip() + +def text_to_tooltip(s): + """Convert plain text to tooltip""" + return html.escape(s).replace('\n', ' ') + +def notebook_toc_entry(notebook_name, prefix, path=None, tooltips=True): + # notebook_path = import_notebooks.find_notebook(notebook_name, path) + notebook_path = notebook_name + notebook_title = get_title(notebook_path) + notebook_basename = os.path.basename(notebook_name) + notebook_base = os.path.splitext(notebook_basename)[0] + notebook_intro = markdown_to_text(get_intro(notebook_path)) + notebook_tooltip = text_to_tooltip(f'{notebook_title} ({notebook_base})\n\n{notebook_intro}') + + if tooltips: + return f'{prefix} <a href="{notebook_basename}" title="{notebook_tooltip}">{notebook_title}</a>\n' + else: + return f'{prefix} [{notebook_title}]({notebook_basename})' + +def notebook_toc(public_chapters, appendices, booktitle): + if booktitle: + booktitle = "# " + booktitle + else: + booktitle = "" + + chapter_toc = "## [Table of Contents](index.ipynb)\n\n" + counter = 1 + for notebook in public_chapters + appendices: + notebook_title = get_title(notebook) + if (notebook_title.startswith("Part ") or + notebook_title.startswith("Appendices")): + # chapter_toc += "\n### " + notebook_title + "\n\n" + chapter_toc += "\n" + notebook_toc_entry(notebook, "###") + "\n" + else: + chapter_toc += notebook_toc_entry(notebook, "*") # repr(counter) + ".") + counter += 1 + + # appendix_toc = "### [Appendices](99_Appendices.ipynb)\n\n" + # for notebook in appendices: + # appendix_toc += notebook_toc_entry(notebook, "*") + + sitemap = r"""## Sitemap +While the chapters of this book can be read one after the other, there are many possible paths through the book. In this graph, an arrow $A \rightarrow B$ means that chapter $A$ is a prerequisite for chapter $B$. You can pick arbitrary paths in this graph to get to the topics that interest you most: +""" + + sitemap_code_1 = "# ignore\nfrom IPython.display import SVG" + sitemap_code_2 = "# ignore\nSVG(filename='PICS/Sitemap.svg')" + + toc_notebook = nbformat.v4.new_notebook( + cells=[ + nbformat.v4.new_markdown_cell(source=booktitle), + nbformat.v4.new_markdown_cell(source=sitemap), + nbformat.v4.new_code_cell(source=sitemap_code_1), + nbformat.v4.new_code_cell(source=sitemap_code_2), + nbformat.v4.new_markdown_cell(source=chapter_toc) + # nbformat.v4.new_markdown_cell(source=appendix_toc), + ]) + + # Get along with TOC extension + toc_notebook.metadata['toc'] = { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": False, + "sideBar": False, + "skip_h1_title": False, + "title_cell": "", + "title_sidebar": "Contents", + "toc_cell": False, + "toc_position": {}, + "toc_section_display": False, + "toc_window_display": False + } + + # Add general metadata + toc_notebook.metadata["kernelspec"] = { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + + toc_notebook.metadata["language_info"] = { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + + return toc_notebook + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--chapters", help="List of public chapters") + parser.add_argument("--appendices", help="List of appendices") + parser.add_argument("--title", help="Book title", default="") + args = parser.parse_args() + + public_chapters = args.chapters.split() + appendices = args.appendices.split() + + toc_notebook = notebook_toc(public_chapters, appendices, args.title) + sys.stdout.buffer.write(nbformat.writes(toc_notebook).encode("utf-8")) diff --git a/notebooks/shared/utils/post_html.py b/notebooks/shared/utils/post_html.py new file mode 100755 index 000000000..d936b29d1 --- /dev/null +++ b/notebooks/shared/utils/post_html.py @@ -0,0 +1,942 @@ +#!/usr/bin/env python3 +# Expand elements in generated HTML +# Usage: post-html.py CHAPTER_NAME CHAPTER_1 CHAPTER_2 ... + +# Note: I suppose this could also be done using Jinja2 templates and ipypublish, +# but this thing here works pretty well. +# If you'd like to convert this into some more elegant framework, +# implement it and send me a pull request -- AZ + +import argparse +import os.path +import time +import datetime +import re +import sys +import io +import html +import urllib + +try: + import nbformat + have_nbformat = True +except: + have_nbformat = False + +# Process arguments +parser = argparse.ArgumentParser() +parser.add_argument("--home", help="omit links to notebook, code, and slides", action='store_true') +parser.add_argument("--include-ready", help="include ready chapters", action='store_true') +parser.add_argument("--include-todo", help="include work-in-progress chapters", action='store_true') +parser.add_argument("--project", help="project name", default="fuzzingbook") +parser.add_argument("--title", help="book title", default="The Fuzzing Book") +parser.add_argument("--authors", help="list of authors", default="A. Zeller et al.") +parser.add_argument("--twitter", help="twitter handle", default="@FuzzingBook") +parser.add_argument("--menu-prefix", help="prefix to html files in menu") +parser.add_argument("--all-chapters", help="List of all chapters") +parser.add_argument("--public-chapters", help="List of public chapters") +parser.add_argument("--ready-chapters", help="List of ready chapters") +parser.add_argument("--todo-chapters", help="List of work-in-progress chapters") +parser.add_argument("--new-chapters", help="List of new chapters") +parser.add_argument("chapter", nargs=1) +args = parser.parse_args() + +# Some fixed strings +project = args.project +booktitle = args.title +authors = args.authors +twitter = args.twitter + +site_html = f"https://www.{project}.org/" +github_html = f"https://github.com/uds-se/{project}/" +notebook_html = f"https://mybinder.org/v2/gh/uds-se/{project}/master?filepath=docs/" + +# Menus +# For icons, see https://fontawesome.com/cheatsheet +menu_start = r""" +<nav> +<div id="cssmenu"> + <ul> + <li class="has-sub"><a href="#"><span title="__BOOKTITLE__"><i class="fa fa-fw fa-bars"></i> </span><span class="menu_1">__BOOKTITLE_BETA__</span></a> + <ol> + <__STRUCTURED_ALL_CHAPTERS_MENU__> + <li><a href="__SITE_HTML__html/00_Index.html">Index (beta)</a></i></li> + </ol> + </li> + <li class="has-sub"><a href="#"><span title="__CHAPTER_TITLE__"><i class="fa fa-fw fa-list-ul"></i></span> <span class="menu_2">__CHAPTER_TITLE_BETA__</span></a> + <__ALL_SECTIONS_MENU__> + </li> + """ + +menu_end = r""" + <li class="has-sub"><a href="#"><span title="Share"><i class="fa fa-fw fa-comments"></i> </span> <span class="menu_4">Share</span></a> + <ul> + <li><a href="__SHARE_TWITTER__" target="popup" __TWITTER_ONCLICK__><i class="fa fa-fw fa-twitter"></i> Share on Twitter</a> + <li><a href="__SHARE_FACEBOOK__" target="popup" __FACEBOOK_ONCLICK__><i class="fa fa-fw fa-facebook"></i> Share on Facebook</a> + <li><a href="__SHARE_MAIL__"><i class="fa fa-fw fa-envelope"></i> Share by Email</a> + <li><a href="#citation" id="cite" onclick="revealCitation()"><i class="fa fa-fw fa-mortar-board"></i> Cite</a> + </ul> + </li> + <li class="has-sub"><a href="#"><span title="Help"><i class="fa fa-fw fa-question-circle"></i></span> <span class="menu_5">Help</span></a> + <ul> + <li><a href="__SITE_HTML__#Troubleshooting"><i class="fa fa-fw fa-wrench"></i> Troubleshooting</a></li> + <li><a href="https://docs.python.org/3/tutorial/" target=_blank><i class="fa fa-fw fa-question-circle"></i> Python Tutorial</a> + <li><a href="https://www.dataquest.io/blog/jupyter-notebook-tutorial/" target=_blank><i class="fa fa-fw fa-question-circle"></i> Jupyter Notebook Tutorial</a> + <li><a href="__GITHUB_HTML__issues/" target="_blank"><i class="fa fa-fw fa-commenting"></i> Report an Issue</a></li> + </ul> + </li> + </ul> +</div> +</nav> +""" + +site_header_template = menu_start + r""" + <li class="has-sub"><a href="#"><span title="Resources"><i class="fa fa-fw fa-cube"></i> </span><span class="menu_3">Resources</span></a> + <ul> + <li><a href="__CHAPTER_NOTEBOOK_IPYNB__" target="_blank" class="edit_as_notebook"><i class="fa fa-fw fa-edit"></i> Edit Notebooks</a></li> + <li><a href="__SITE_HTML__dist/__PROJECT__-code.zip"><i class="fa fa-fw fa-cube"></i> All Code (.zip)</a></li> + <li><a href="__SITE_HTML__dist/__PROJECT__-notebooks.zip"><i class="fa fa-fw fa-cube"></i> All Notebooks (.zip)</a></li> + <li><a href="__GITHUB_HTML__" target="_blank"><i class="fa fa-fw fa-github"></i> Project Page</a></li> + <li><a href="html/ReleaseNotes.html" target="_blank"><i class="fa fa-fw fa-calendar"></i> Release Notes</a></li> + </ul> + </li> +""" + menu_end + +# Chapters +chapter_header_template = menu_start + r""" + <li class="has-sub"><a href="#"><span title="Resources"><i class="fa fa-fw fa-cube"></i> </span><span class="menu_3">Resources</span></a> + <ul> + <li><a href="__CHAPTER_NOTEBOOK_IPYNB__" target="_blank" class="edit_as_notebook"><i class="fa fa-fw fa-edit"></i> Edit as Notebook</a></li> + <li><a href="__SITE_HTML__slides/__CHAPTER__.slides.html" target="_blank"><i class="fa fa-fw fa-video-camera"></i> View Slides</a></li> + <li><a href="__SITE_HTML__code/__CHAPTER__.py"><i class="fa fa-fw fa-download"></i> Download Code (.py)</a></li> + <li><a href="__SITE_HTML__notebooks/__CHAPTER__.ipynb"><i class="fa fa-fw fa-download"></i> Download Notebook (.ipynb)</a></li> + <li><a href="__SITE_HTML__dist/__PROJECT__-code.zip"><i class="fa fa-fw fa-cube"></i> All Code (.zip)</a></li> + <li><a href="__SITE_HTML__dist/__PROJECT__-notebooks.zip"><i class="fa fa-fw fa-cube"></i> All Notebooks (.zip)</a></li> + <li><a href="__GITHUB_HTML__" target="_blank"><i class="fa fa-fw fa-github"></i> Project Page</a></li> + <li><a href="ReleaseNotes.html" target="_blank"><i class="fa fa-fw fa-calendar"></i> Release Notes</a></li> + </ul> + </li> + """ + menu_end + + +# Footers +site_citation_template = r""" +<div id="citation" class="citation" style="display: none;"> +<a name="citation"></a> +<h2>How to Cite this Work</h2> +<p> +__AUTHORS__: "<a href="__SITE_HTML__">__BOOKTITLE__</a>". Retrieved __DATE__. +</p> +<pre> +@book{__BIBTEX_KEY__, + author = {__AUTHORS_BIBTEX__}, + title = {__BOOKTITLE__}, + year = {__YEAR__}, + publisher = {CISPA Helmholtz Center for Information Security}, + howpublished = {\url{__SITE_HTML__}}, + note = {Retrieved __DATE__}, + url = {__SITE_HTML__}, + urldate = {__DATE__} +} +</pre> +</div> +""" + +chapter_citation_template = r""" +<div id="citation" class="citation" style="display: none;"> +<a name="citation"></a> +<h2>How to Cite this Work</h2> +<p> +__AUTHORS__: "<a href="__CHAPTER_HTML__">__CHAPTER_TITLE__</a>". In __AUTHORS__, "<a href="__SITE_HTML__">__BOOKTITLE__</a>", <a href="__CHAPTER_HTML__">__CHAPTER_HTML__</a>. Retrieved __DATE__. +</p> +<pre> +@incollection{__BIBTEX_KEY__:__CHAPTER__, + author = {__AUTHORS_BIBTEX__}, + booktitle = {__BOOKTITLE__}, + title = {__CHAPTER_TITLE__}, + year = {__YEAR__}, + publisher = {CISPA Helmholtz Center for Information Security}, + howpublished = {\url{__CHAPTER_HTML__}}, + note = {Retrieved __DATE__}, + url = {__CHAPTER_HTML__}, + urldate = {__DATE__} +} +</pre> +</div> +""" + +common_footer_template = r""" +<p class="imprint"> +<img style="float:right" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="Creative Commons License"> +The content of this project is licensed under the +<a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" target=_blank>Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>. +The source code that is part of the content, as well as the source code used to format and display that content is licensed under the <a href="https://github.com/uds-se/__PROJECT__/blob/master/LICENSE.md#mit-license">MIT License</a>. +<a href="__GITHUB_HTML__commits/master/notebooks/__CHAPTER__.ipynb" target=_blank)>Last change: __DATE__</a> • +<a href="#citation" id="cite" onclick="revealCitation()">Cite</a> • +<a href="https://cispa.de/en/impressum" target=_blank>Imprint</a> +</p> + +<script> +function revealCitation() { + var c = document.getElementById("citation"); + c.style.display = "block"; +} +</script> +""" + +chapter_footer_template = common_footer_template + chapter_citation_template +site_footer_template = common_footer_template + site_citation_template + +from nbdepend import get_text_contents, get_title + +def get_description(notebook): + """Return the first 2-4 sentences from a notebook file, after the title""" + contents = get_text_contents(notebook) + match = re.search(r'^# .*$([^#]*)^#', contents, re.MULTILINE) + if match is None: + desc = contents + else: + desc = match.group(1).replace(r'\n', '').replace('\n', '') + desc = re.sub(r"\]\([^)]*\)", "]", desc).replace('[', '').replace(']', '') + desc = re.sub(r"[_*]", "", desc) + # print("Description", desc.encode('utf-8')) + return desc + +def get_sections(notebook): + """Return the section titles from a notebook file""" + contents = get_text_contents(notebook) + matches = re.findall(r'^(# .*)', contents, re.MULTILINE) + if len(matches) >= 5: + # Multiple top sections (book?) - use these + pass + else: + # Use sections and subsections instead + matches = re.findall(r'^(###? .*)', contents, re.MULTILINE) + + sections = [match.replace(r'\n', '') for match in matches] + # print("Sections", repr(sections).encode('utf-8')) + + # Filter out second synopsis section + if '## Synopsis' in sections: + sections = ['## Synopsis'] + [sec for sec in sections if sec != '## Synopsis'] + + # Filter out "End of Excursion" titles + sections = [sec for sec in sections + if sec != '## End of Excursion' and sec != '### End of Excursion'] + + return sections + + +def anchor(title): + """Return an anchor '#a-title' for a title 'A title'""" + return '#' + title.replace(' ', '-') + + +def decorate(section, depth): + if depth != 2: + return section + + if section == "Synopsis": + section = '<i class="fa fa-fw fa-map"></i> ' + section + elif section == "Lessons Learned": + section = '<i class="fa fa-fw fa-trophy"></i> ' + section + elif section == "Next Steps": + section = '<i class="fa fa-fw fa-arrows"></i> ' + section + elif section == "Background": + section = '<i class="fa fa-fw fa-mortar-board"></i> ' + section + elif section == "Exercises": + section = '<i class="fa fa-fw fa-edit"></i> ' + section + else: + section = ' •   ' + section + + return section + +# Authors +def bibtex_escape(authors): + """Return list of authors in BibTeX-friendly form""" + tex_escape_table = { + "ä": r'{\"a}', + "ö": r'{\"o}', + "ü": r'{\"u}', + "Ä": r'{\"A}', + "Ö": r'{\"O}', + "Ü": r'{\"U}', + "ß": r'{\ss}' + } + return "".join(tex_escape_table.get(c,c) for c in authors) + +assert bibtex_escape("Böhme") == r'B{\"o}hme' + +authors_bibtex = bibtex_escape(authors).replace(", and ", " and ").replace(", ", " and ") + + +# The other way round +# Use "grep '\\' BIBFILE" to see accents currently in use +def bibtex_unescape(contents): + """Fix TeX escapes introduced by BibTeX""" + tex_unescape_table = { + r'{\"a}': "ä", + r'{\"o}': "ö", + r'{\"u}': "ü", + r'{\"i}': "ï", + r'{\"e}': "ë", + r'{\"A}': "Ä", + r'{\"O}': "Ö", + r'{\"U}': "Ü", + r'{\ss}': "ß", + r'{\`e}': "è", + r'{\'e}': "é", + r'{\`a}': "à", + r'{\'a}': "á", + r'{\`i}': "ì", + r'{\'i}': "í", + r'{\`o}': "ò", + r'{\'o}': "ó", + r'{\`u}': "ù", + r'{\'u}': "ú", + r'{\d{s}}': "ṣ", + r'{\d{n}}': "ṇ", + r'{\d{t}}': "ṭ", + r'{\=a}': "ā", + r'{\=i}': "ī" + } + for key in tex_unescape_table: + contents = contents.replace(key, tex_unescape_table[key]) + return contents + +assert bibtex_unescape(r"B{\"o}hme") == 'Böhme' +assert bibtex_unescape(r"P{\`e}zze") == 'Pèzze' + + + +# Imports are in <span class="nn">NAME</span> +RE_IMPORT = re.compile(r'<span class="nn">([^<]+)</span>') + +# Add links to imports +def add_links_to_imports(contents, html_file): + imports = re.findall(RE_IMPORT, contents) + for module in imports: + link = None + if module.startswith("bookutils"): + link = "https://github.com/uds-se/fuzzingbook/tree/master/notebooks/bookutils" + elif module == "requests": + link = "http://docs.python-requests.org/en/master/" + elif module.startswith("IPython"): + # Point to IPython doc + link = "https://ipython.readthedocs.io/en/stable/api/generated/" + module + ".html" + elif module.startswith("selenium"): + # Point to Selenium doc + link = "https://selenium-python.readthedocs.io/" + elif module.startswith(project): + # Point to notebook + link = module[module.find('.') + 1:] + '.html' + elif module in ['debuggingbook', 'fuzzingbook']: + link = f"https://www.{module}.org/" + elif (module.startswith('debuggingbook') or + module.startswith('fuzzingbook')): + base = module[:module.find('.')] + submodule = module[module.find('.') + 1:] + link = f"https://www.{base}.org/html/{submodule}.html" + elif module in ['astor', 'pydriller', 'ipywidgets', 'graphviz']: + link = f'https://{module}.readthedocs.io/' + elif module in ['enforce', 'showast']: + link = f'https://pypi.org/project/{module}/' + elif module == 'magic': + link = 'https://pypi.org/project/python-magic/' + elif module == 'diff_match_patch': + link = 'https://github.com/google/diff-match-patch' + elif module == 'easyplotly': + link = 'https://mwouts.github.io/easyplotly/' + elif module == 'numpy': + link = 'https://numpy.org/' + elif module.startswith('matplotlib'): + link = 'https://matplotlib.org/' + elif module.startswith('plotly'): + link = 'https://plotly.com/python/' + elif module.startswith('sklearn'): + link = 'https://scikit-learn.org/' + elif module in ['ep', 'go', 'plt', 'np']: + link = None # aliases + elif module[0].islower(): + # Point to Python doc + link = "https://docs.python.org/3/library/" + module + ".html" + else: + # Point to notebook + link = module + '.html' + + # print(f'{module} -> ', repr(link)) + + if link and link.startswith('http'): + # Check whether link exists + try: + urllib.request.urlopen(link) + except urllib.error.HTTPError as exc: + if exc.code == 403: + # We get this when accessing readthedocs.io + pass + else: + print(f"{html_file}: Cannot find link {link} for {repr(module)}: {exc}", + file=sys.stderr) + link = None + except urllib.error.URLError as exc: + print(f"{html_file}: Cannot open {link} for {repr(module)}: {exc}", + file=sys.stderr) + link = None + + if link: + contents = contents.replace(r'<span class="nn">' + module + r'</span>', + r'<span class="nn"><a href="' + link + + r'" class="import" target="_blank">' + + module + r"</a>" + r'</span>') + + return contents + +# Remove cells that start with `# ignore` or only contain +# a quiz() or a display() call. Keep the output. +RE_IGNORE = re.compile(r''' +<div class="input_code"> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> + +<div class="inner_cell"> +<div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span>(<span class="n">(quiz|display)</span>|<span class="c1">#\s*[iI]gnore[^<]*</span>).*? +</div> +</div></div> +</div> +</div> +''', re.DOTALL) + +def remove_ignored_code(text): + return RE_IGNORE.sub('', text) + +assert remove_ignored_code(''' +<div class="input_code"> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> + +<div class="inner_cell"> +<div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">quiz</span><span class="p">(</span><span class="s2">"From the difference between success and failure, we can already devise some observations about what's wrong with the output. Which of these can we turn into general hypotheses?"</span><span class="p">,</span> + <span class="p">[</span><span class="s2">"Double quotes are stripped from the tagged input."</span><span class="p">,</span> + <span class="s2">"Tags in double quotes are not stripped."</span><span class="p">,</span> + <span class="s2">"The tag '&lt;b&gt;' is always stripped from the input."</span><span class="p">,</span> + <span class="s2">"Four-letter words are stripped."</span><span class="p">],</span> <span class="p">[</span><span class="mi">298</span> <span class="o">%</span> <span class="mi">33</span><span class="p">,</span> <span class="mi">1234</span> <span class="o">%</span> <span class="mi">616</span><span class="p">])</span> +</pre></div> + +</div> +</div></div> +</div> +</div> +''') == '' + + +# Remove `# type: ignore` comments +RE_TYPE_IGNORE = re.compile(r' <span class="c1"># type: ignore</span>') +def remove_type_ignore(text): + return RE_TYPE_IGNORE.sub('', text) + + +# Sharing +def cgi_escape(text): + """Produce entities within text.""" + cgi_escape_table = { + " ": r"%20", + "&": r"%26", + '"': r"%22", + "'": r"%27", + ">": r"%3e", + "<": r"%3c", + ":": r"%3a", + "/": r"%2f", + "?": r"%3f", + "=": r"%3d", + } + return "".join(cgi_escape_table.get(c,c) for c in text) + + +# Highlight Synopsis +def highlight_synopsis(text): + synopsis_start = text.find('<h2 id="Synopsis">') + if synopsis_start < 0: + return text # No synopsis + + synopsis_end = text.find('<div class="input_markdown">', synopsis_start + 1) + if synopsis_end < 0: + return text # No synopsis + + text = (text[:synopsis_start] + + '<div class="synopsis">' + + text[synopsis_start:synopsis_end] + + '</div>\n\n' + + text[synopsis_end:]) + + # Strip original synopsis + orig_synopsis_start = text.find('<h2 id="Synopsis">', synopsis_end + 1) + orig_synopsis_end = text.find('<h2 ', orig_synopsis_start + 1) + + text = (text[:orig_synopsis_start] + text[orig_synopsis_end:]) + + return text + +# Fix CSS +def fix_css(text): + # Avoid forcing text color to black when printing + return text.replace('color: #000 !important;', '') + + +# Inline our SVG graphics +RE_IMG_SVG = re.compile(r'<img src="(PICS/[^"]*.svg)"[^>]*>') + +def inline_svg_graphics(text, chapter_html_file): + while True: + match = RE_IMG_SVG.search(text) + if not match: + break + + src = match.group(1) + svg_file = os.path.join(os.path.dirname(chapter_html_file), src) + svg_data = open(svg_file).read() + text = text[:match.start()] + svg_data + text[match.end():] + + return text + + +# Handle Excursions +# Cells with "Excursion: <summary>" and "End of Excursion" are translated to +# HTML <details> regions +RE_BEGIN_EXCURSION = re.compile(r''' +<div[^>]*?>[^<]*? # four divs +<div[^>]*?>[^<]*? +<div[^>]*?>[^<]*? +<div[^>]*?>[^<]*? +<h[0-9]\s*?(id="(?P<id>[^"]*)")[^>]*>Excursion:\s*\s(?P<title>[^\n]*?)(<a[^\n]*?>[^\n]*?</a>)?</h[0-9]> +</div>[^<]*? # four closing divs +</div>[^<]*? +</div>[^<]*? +</div>''', re.DOTALL | re.VERBOSE) + +RE_END_EXCURSION = re.compile(r''' +<div[^>]*?>[^<]*? # four divs +<div[^>]*?>[^<]*? +<div[^>]*?>[^<]*? +<div[^>]*?>[^<]*? +<h[0-9][^<>]*?>[eE]nd[^\n]*[eE]xcursion[^\n]*</h[0-9]> +</div>[^<]*? # four closing divs +</div>[^<]*? +</div>[^<]*? +</div>''', re.DOTALL | re.VERBOSE) + +def add_excursion_switchers(text): + text = RE_BEGIN_EXCURSION.sub( + r'<details id="\g<id>">\n<summary>\g<title></summary>', text) + text = RE_END_EXCURSION.sub( + '</details>', text) + return text + +text1 = ''' +Some stuff to begin with + +<div class="input_markdown"> +<div class="cell border-box-sizing text_cell rendered"> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"><h4 id="Excursion:-Implementing-display_tree()">Excursion: Implementing <code>display_tree()</code><a class="anchor-link" href="#Excursion:-Implementing-display_tree()">¶</a></h4></div> +</div> +</div> +</div> + +<div class="input_markdown"> +<div class="cell border-box-sizing text_cell rendered"> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"><p>We use the <code>dot</code> drawing program from the <code>graphviz</code> package algorithmically, traversing the above structure. (Unless you're deeply interested in tree visualization, you can directly skip to the example below.)</p> +</div> +</div> +</div> +</div> + +<div class="input_markdown"> +<div class="cell border-box-sizing text_cell rendered"> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"><h4 id="End-of-Excursion">End of Excursion<a class="anchor-link" href="#End-of-Excursion">¶</a></h4></div> +</div> +</div> +</div> + +<div class="input_markdown"> +<div class="cell border-box-sizing text_cell rendered"> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"><h4 id="Excursion:-Implementing-display_tree()">Excursion: Implementing <code>display_tree()</code> again<a class="anchor-link" href="#Excursion:-Implementing-display_tree()">¶</a></h4></div> +</div> +</div> +</div> + +Some standard stuff + +<div class="input_markdown"> +<div class="cell border-box-sizing text_cell rendered"> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"><h4 id="End-of-Excursion">End of Excursion<a class="anchor-link" href="#End-of-Excursion">¶</a></h4></div> +</div> +</div> +</div> + + +Some other stuff +''' + +# print(add_excursion_switchers(text1)) +# sys.exit(0) + + + +# Get template elements +chapter_html_file = args.chapter[0] +chapter = os.path.splitext(os.path.basename(chapter_html_file))[0] +chapter_notebook_file = os.path.join("notebooks", chapter + ".ipynb") +notebook_modification_time = os.path.getmtime(chapter_notebook_file) +notebook_modification_datetime = datetime.datetime.fromtimestamp(notebook_modification_time) \ + .astimezone().isoformat(sep=' ', timespec='seconds') +notebook_modification_year = repr(datetime.datetime.fromtimestamp(notebook_modification_time).year) + +# Get list of chapters +if args.public_chapters is not None: + public_chapters = args.public_chapters.split() +else: + public_chapters = [] + +if args.all_chapters is not None: + all_chapters = args.all_chapters.split() +else: + all_chapters = [] + +if args.include_ready and args.ready_chapters is not None: + ready_chapters = args.ready_chapters.split() +else: + ready_chapters = [] + +if args.include_todo and args.todo_chapters is not None: + todo_chapters = args.todo_chapters.split() +else: + todo_chapters = [] + +new_chapters = args.new_chapters.split() +beta_chapters = ready_chapters + todo_chapters +include_beta = args.include_ready or args.include_todo + +new_suffix = ' <strong class="new_chapter">•</strong>' +todo_suffix = '<i class="fa fa-fw fa-wrench"></i>' +ready_suffix = '<i class="fa fa-fw fa-warning"></i>' + +booktitle_beta = booktitle +if include_beta: + booktitle_beta += " " + todo_suffix + +menu_prefix = args.menu_prefix +if menu_prefix is None: + menu_prefix = "" + +if args.home: + header_template = site_header_template + footer_template = site_footer_template +else: + header_template = chapter_header_template + footer_template = chapter_footer_template + +# Popup menus +twitter_onclick = r""" +onclick="window.open('__SHARE_TWITTER__','popup','width=600,height=600'); return false;" +""" +facebook_onclick = r""" +onclick="window.open('__SHARE_FACEBOOK__','popup','width=600,height=600'); return false;" +""" +if args.home: + # Including the Twitter timeline already creates a popup + twitter_onclick = "" + +# Set base names +if include_beta: + site_html += "beta/" + +# Book image +bookimage = site_html + "html/PICS/wordcloud.png" + +# Binder +if include_beta: + notebook_html += "beta/" +notebook_html += "notebooks/" + +# Construct sections menu + +basename = os.path.splitext(os.path.basename(chapter_html_file))[0] +chapter_ipynb_file = os.path.join("notebooks", basename + ".ipynb") + +all_sections_menu = "" +sections = get_sections(chapter_ipynb_file) +current_depth = 1 + +for section in sections: + depth = section.count('#') + while section.startswith('#') or section.startswith(' '): + section = section[1:] + + if section.startswith('['): + section = section[1:section.find(']')] + + if depth == current_depth: + all_sections_menu += '</li>' + + if depth > current_depth: + all_sections_menu += "<ul>" * (depth - current_depth) + + if depth < current_depth: + all_sections_menu += "</ul></li>" * (current_depth - depth) + + all_sections_menu += '<li class="has-sub"><a href="%s">%s</a>\n' % (anchor(section), decorate(section, depth)) + current_depth = depth + +while current_depth > 1: + all_sections_menu += '</ul></li>' + current_depth -= 1 + + +# Construct chapter menu + +if args.home: + chapter_html = site_html + chapter_notebook_ipynb = notebook_html + "00_Table_of_Contents.ipynb" +else: + chapter_html = site_html + "html/" + basename + ".html" + chapter_notebook_ipynb = notebook_html + basename + ".ipynb" + +chapter_title = get_title(chapter_ipynb_file) +# if chapter_ipynb_file in new_chapters: +# chapter_title += " " + new_suffix + +chapter_title_beta = chapter_title +is_todo_chapter = include_beta and chapter_ipynb_file in todo_chapters +is_ready_chapter = include_beta and chapter_ipynb_file in ready_chapters +if is_todo_chapter: + chapter_title_beta += " " + todo_suffix +# if is_ready_chapter: +# chapter_title_beta += " " + ready_suffix + +if args.home: + link_class = ' class="this_page"' +else: + link_class = '' +all_chapters_menu = ''' +<li><a href="%s"%s><span class="part_number"><i class="fa fa-fw fa-home"></i></span> About this book</a></li> +<li><a href="__SITE_HTML__html/00_Table_of_Contents.html"><i class="fa fa-fw fa-sitemap"></i></span> Sitemap</a></li> +''' % (site_html, link_class) +structured_all_chapters_menu = all_chapters_menu + +this_chapter_counter = 1 +for counter, menu_ipynb_file in enumerate(all_chapters): + if menu_ipynb_file == chapter_ipynb_file: + this_chapter_counter = counter + +in_sublist = False +for counter, menu_ipynb_file in enumerate(all_chapters): + basename = os.path.splitext(os.path.basename(menu_ipynb_file))[0] + structured_title = '' # '<span class="chnum">' + repr(counter + 1) + '</span> ' + title = "" + + is_public = menu_ipynb_file in public_chapters + + if menu_ipynb_file == chapter_ipynb_file: + link_class = ' class="this_page"' + elif not is_public: + link_class = ' class="not_public"' + else: + link_class = '' + + file_title = get_title(menu_ipynb_file) + + if menu_ipynb_file in new_chapters: + file_title += new_suffix + + is_part = file_title.startswith("Part ") or file_title.startswith("Append") + if file_title.startswith("Part "): + file_title = '<span class="part_number">' + \ + file_title.replace("Part ", "") \ + .replace(":", '</span>') + # .replace("I:", 'Ⅰ') \ + # .replace("II:", 'Ⅱ') \ + # .replace("III:", 'Ⅲ') \ + # .replace("IV:", 'Ⅳ') \ + # .replace("V:", 'Ⅴ') \ + # .replace("VI:", 'Ⅵ') \ + # .replace("VII:", 'Ⅶ') \ + # .replace("VIII:", 'Ⅷ') \ + # .replace("IX:", 'Ⅸ') \ + # .replace("X:", 'Ⅹ') \ + # .replace("XI:", 'Ⅺ') \ + # .replace("XII:", 'Ⅻ') \ + # .replace(';', ';</span>') \ + + title += file_title + structured_title += file_title + + beta_indicator = '' + # if menu_ipynb_file in ready_chapters: + # beta_indicator = " " + ready_suffix + if menu_ipynb_file in todo_chapters: + beta_indicator = " " + todo_suffix + menu_html_file = menu_prefix + basename + ".html" + + if is_part: + # New part + if in_sublist: + structured_all_chapters_menu += "</ul>" + in_sublist = False + structured_all_chapters_menu += \ + '<li class="has-sub"><a href="%s" class="chapters">%s%s' \ + % (menu_html_file, file_title, beta_indicator) + structured_all_chapters_menu += ' <i class="fa fa-fw fa-caret-right"></i></a>\n<ul>\n' + in_sublist = True + else: + # New chapter + menu_link = menu_html_file if is_public else "#" + + structured_item = '<li><a href="%s"%s>%s%s</a></li>\n' % \ + (menu_link, link_class, structured_title, beta_indicator) + + structured_all_chapters_menu += structured_item + + item = '<li><a href="%s"%s>%s%s</a></li>\n' % \ + (menu_link, link_class, title, beta_indicator) + all_chapters_menu += item + +if in_sublist: + structured_all_chapters_menu += "</ul>" + in_sublist = False + +# Description +description = html.escape(get_description(chapter_ipynb_file)) + +# Exercises +end_of_exercise = ''' +<p><div class="solution_link"><a href="__CHAPTER_NOTEBOOK_IPYNB__#Exercises" target=_blank>Use the notebook</a> to work on the exercises and see solutions.</div></p> +''' + +if args.home: + share_message = (r'I just read "' + booktitle + + rf'" ({twitter}) at ' + site_html) + share_title = booktitle +else: + share_message = (r'I just read "' + chapter_title + + rf'" (part of {twitter}) at ' + chapter_html) + share_title = chapter_title + +share_twitter = "https://twitter.com/intent/tweet?text=" + cgi_escape(share_message) +share_facebook = "https://www.facebook.com/sharer/sharer.php?u=" + cgi_escape(chapter_html) +share_mail = ("mailto:?subject=" + cgi_escape(share_title) + + "&body=" + cgi_escape(share_message)) + +# Page title +if args.home: + page_title = booktitle +else: + page_title = chapter_title + " - " + booktitle + +# sys.exit(0) + +# Read it in +print("Reading", chapter_html_file) +chapter_contents = open(chapter_html_file, encoding="utf-8").read() + +# Replacement orgy +# 1. Replace all markdown links to .ipynb by .html, such that cross-chapter links work +# 2. Fix extra newlines in cell output produced by ipypublish +# 3. Insert the menus and templates as defined above +chapter_contents = chapter_contents \ + .replace("\n\n</pre>", "\n</pre>") \ + .replace("<__HEADER__>", header_template) \ + .replace("<__FOOTER__>", footer_template) \ + .replace("<__ALL_CHAPTERS_MENU__>", all_chapters_menu) \ + .replace("<__STRUCTURED_ALL_CHAPTERS_MENU__>", structured_all_chapters_menu) \ + .replace("<__ALL_SECTIONS_MENU__>", all_sections_menu) \ + .replace("<__END_OF_EXERCISE__>", end_of_exercise) \ + .replace("__PROJECT__", project) \ + .replace("__PAGE_TITLE__", page_title) \ + .replace("__BOOKTITLE_BETA__", booktitle_beta) \ + .replace("__BOOKTITLE__", booktitle) \ + .replace("__BOOKIMAGE__", bookimage) \ + .replace("__DESCRIPTION__", description) \ + .replace("__AUTHORS__", authors) \ + .replace("__CHAPTER__", chapter) \ + .replace("__CHAPTER_TITLE__", chapter_title) \ + .replace("__CHAPTER_TITLE_BETA__", chapter_title_beta) \ + .replace("__CHAPTER_HTML__", chapter_html) \ + .replace("__SITE_HTML__", site_html) \ + .replace("__NOTEBOOK_HTML__", notebook_html) \ + .replace("__CHAPTER_NOTEBOOK_IPYNB__", chapter_notebook_ipynb) \ + .replace("__GITHUB_HTML__", github_html) \ + .replace("__TWITTER_ONCLICK__", twitter_onclick) \ + .replace("__FACEBOOK_ONCLICK__", facebook_onclick) \ + .replace("__SHARE_TWITTER__", share_twitter) \ + .replace("__SHARE_FACEBOOK__", share_facebook) \ + .replace("__SHARE_MAIL__", share_mail) \ + .replace("__DATE__", notebook_modification_datetime) \ + .replace("__YEAR__", notebook_modification_year) \ + .replace("__BIBTEX_KEY__", project + notebook_modification_year) + +# Remove code cells that only display graphics or start with `#ignore` +chapter_contents = remove_ignored_code(chapter_contents) + +# Remove `# type: ignore` comments +chapter_contents = remove_type_ignore(chapter_contents) + +# Add links to imports +chapter_contents = add_links_to_imports(chapter_contents, chapter_html_file) + +# Inline SVG graphics (preserving style and tooltips) +chapter_contents = inline_svg_graphics(chapter_contents, chapter_html_file) + +# Fix simple .ipynb links within text and XML +if args.home: + chapter_contents = re.sub(r'<a (xlink:href|href)="([a-zA-Z0-9_]*)\.ipynb', + r'<a \1="html/\2.html', chapter_contents) +else: + chapter_contents = re.sub(r'<a (xlink:href|href)="([a-zA-Z0-9_]*)\.ipynb', + r'<a \1="\2.html', chapter_contents) + +# Recode TeX accents imported from .bib +chapter_contents = bibtex_unescape(chapter_contents) + +# Expand BibTeX authors at the end, because Marcel needs his Umlaut encoded +chapter_contents = \ + chapter_contents.replace("__AUTHORS_BIBTEX__", authors_bibtex) + +# Highlight details switchers +chapter_contents = add_excursion_switchers(chapter_contents) + +# Fix CSS +chapter_contents = fix_css(chapter_contents) + +# Handle the (first) synopsis +chapter_contents = highlight_synopsis(chapter_contents) + +# Get proper links for CSS and Favicon +if args.home: + chapter_contents = chapter_contents.replace("custom.css", menu_prefix + "custom.css") + chapter_contents = chapter_contents.replace("favicon/", menu_prefix + "favicon/") + +# Get a title +# The official way is to set a title in document metadata, +# but a) Jupyter Lab can't edit it, and b) the title conflicts with the chapter header - AZ +chapter_contents = re.sub(r"<title>.*", + "" + page_title + "", chapter_contents, 1) + +beta_warning = None +if is_todo_chapter: + beta_warning = '

' + todo_suffix + ' This chapter is work in progress ("beta"). It is incomplete and may change at any time.

' +elif is_ready_chapter: + beta_warning = '

' + ready_suffix + ' This chapter is still under review ("beta"). It may change at any time.

' + +if beta_warning is not None: + chapter_contents = chapter_contents.replace("", "" + beta_warning) + +# And write it out again +print("Writing", chapter_html_file) +open(chapter_html_file, mode="w", encoding="utf-8").write(chapter_contents) diff --git a/notebooks/shared/utils/post_tex b/notebooks/shared/utils/post_tex new file mode 100755 index 000000000..8b1c93a47 --- /dev/null +++ b/notebooks/shared/utils/post_tex @@ -0,0 +1,12 @@ +#!/bin/sh +# Fix LaTeX output generated by nbpublish + +# Usage: post-tex INPUT.tex > OUTPUT.tex + +# Fix `#' escapes in \href{} and \url{} + +cat "$@" | +sed '/ *\\href{/s/\\#/#/g' | +sed '/ *\\url{/s/\\#/#/g' + + diff --git a/shared b/shared new file mode 120000 index 000000000..56a662daa --- /dev/null +++ b/shared @@ -0,0 +1 @@ +notebooks/shared \ No newline at end of file