diff --git a/compiler/cli.py b/compiler/cli.py new file mode 100644 index 000000000..7f9a3db6b --- /dev/null +++ b/compiler/cli.py @@ -0,0 +1,309 @@ +import argparse +import os + + +class BaseParser(argparse.ArgumentParser): + """ + Base class for all Argument Parsers used by PaSh. It has two configurable flags + by default: debug and log_file. + + Other flags are available by classes which inherit BaseParser + """ + + @staticmethod + def _get_width(): + cpus = os.cpu_count() + assert cpus is not None + return cpus // 8 if cpus >= 16 else 2 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_argument( + "-t", + "--output_time", # FIXME: --time + help="(obsolete, time is always logged now) output the time it took for every step", + action="store_true", + ) + self.add_argument( + "-d", + "--debug", + type=int, + help="configure debug level; defaults to 0", + default=0, + ) + self.add_argument( + "--log_file", + help="configure where to write the log; defaults to stderr.", + default="", + ) + + def add_pash_args(self): + self.add_argument( + "-w", + "--width", + type=int, + default=self._get_width(), + help="set data-parallelism factor", + ) + self.add_argument( + "--no_optimize", + help="not apply transformations over the DFG", + action="store_true", + ) + self.add_argument( + "--dry_run_compiler", + help="not execute the compiled script, even if the compiler succeeded", + action="store_true", + ) + self.add_argument( + "--assert_compiler_success", + help="assert that the compiler succeeded (used to make tests more robust)", + action="store_true", + ) + self.add_argument( + "--avoid_pash_runtime_completion", + help="avoid the pash_runtime execution completion (only relevant when --debug > 0)", + action="store_true", + ) + self.add_argument( + "-p", + "--output_optimized", # FIXME: --print + help="output the parallel shell script for inspection", + action="store_true", + ) + self.add_argument( + "--graphviz", + help="generates graphical representations of the dataflow graphs. The option argument corresponds to the format. PaSh stores them in a timestamped directory in the argument of --graphviz_dir", + choices=["no", "dot", "svg", "pdf", "png"], + default="no", + ) + ## TODO: To discuss: Do we maybe want to have graphviz to always be included + ## in the temp directory (under a graphviz subdirectory) instead of in its own? + ## kk: I think that ideally we want a log-directory where we can put logs, graphviz, + ## and other observability and monitoring info (instead of putting them in the temp). + self.add_argument( + "--graphviz_dir", + help="the directory in which to store graphical representations", + default="/tmp", + ) + self.add_argument( + "--no_parallel_pipelines", + help="Disable parallel running of independent pipelines", + action="store_true", + default=False, + ) + self.add_argument( + "--parallel_pipelines_limit", + help="Maximum number of parallel independent pipelines", + type=int, + default=2, + ) + self.add_argument( + "--r_split_batch_size", + type=int, + help="configure the batch size of r_split (default: 1MB)", + default=1000000, + ) + self.add_argument( + "--config_path", + help="determines the config file path. By default it is 'PASH_TOP/compiler/config.yaml'.", + default="", + ) + self.add_argument( + "--version", + action="version", + version="%(prog)s {version}".format( + version="0.12.2" + ), # What does this version mean? + ) + + self.add_experimental_args() + self.add_obsolete_args() + + def add_obsolete_args(self): + self.add_argument( + "--no_daemon", + help="(obsolete) does nothing -- Run the compiler everytime we need a compilation instead of using the daemon", + action="store_true", + default=False, + ) + self.add_argument( + "--parallel_pipelines", + help="(obsolete) Run multiple pipelines in parallel if they are safe to run. Now true by default. See --no_parallel_pipelines.", + action="store_true", + default=True, + ) + self.add_argument( + "--r_split", + help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", + action="store_true", + ) + self.add_argument( + "--dgsh_tee", + help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", + action="store_true", + ) + self.add_argument( + "--speculation", + help="(obsolete) does nothing -- run the original script during compilation; if compilation succeeds, abort the original and run only the parallel (quick_abort) (Default: no_spec)", + choices=["no_spec", "quick_abort"], + default="no_spec", + ) + + def add_experimental_args(self): + self.add_argument( + "--no_eager", + help="(experimental) disable eager nodes before merging nodes", + action="store_true", + ) + self.add_argument( + "--profile_driven", + help="(experimental) use profiling information when optimizing", + action="store_true", + ) + self.add_argument( + "--speculative", + help="(experimental) use the speculative execution preprocessing and runtime (NOTE: this has nothing to do with --speculation, which is actually misnamed, and should be named concurrent compilation/execution and is now obsolete)", + action="store_true", + default=False, + ) + self.add_argument( + "--termination", + help="(experimental) determine the termination behavior of the DFG. Defaults to cleanup after the last process dies, but can drain all streams until depletion", + choices=["clean_up_graph", "drain_stream"], + default="clean_up_graph", + ) + self.add_argument( + "--daemon_communicates_through_unix_pipes", + help="(experimental) the daemon communicates through unix pipes instead of sockets", + action="store_true", + ) + self.add_argument( + "--distributed_exec", + help="(experimental) execute the script in a distributed environment. Remote machines should be configured and ready", + action="store_true", + default=False, + ) + + +class RunnerParser(BaseParser): + """ + Parser for the PaSh Runner in compiler/pash.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_pash_args() + + self.add_argument( + "input", + nargs="*", + help="the script to be compiled and executed (followed by any command-line arguments", + ) + self.add_argument( + "--preprocess_only", + help="only preprocess the input script and not execute it", + action="store_true", + ) + self.add_argument( + "--output_preprocessed", + help=" output the preprocessed script", + action="store_true", + ) + self.add_argument( + "--interactive", + help="Executes the script using an interactive internal shell session (experimental)", + action="store_true", + ) + self.add_argument( + "-c", + "--command", + help="Evaluate the following as a script, rather than a file", + default=None, + ) + ## This is not the correct way to parse these, because more than one option can be given together, e.g., -ae + self.add_argument( + "-a", + help="Enabling the `allexport` shell option", + action="store_true", + default=False, + ) + self.add_argument( + "+a", + help="Disabling the `allexport` shell option", + action="store_false", + default=False, + ) + ## These two are here for compatibility with respect to bash + self.add_argument( + "-v", + help="(experimental) prints shell input lines as they are read", + action="store_true", + ) + self.add_argument( + "-x", + help="(experimental) prints commands and their arguments as they execute", + action="store_true", + ) + self.set_defaults(preprocess_mode="pash") + + +class CompilerParser(BaseParser): + """ + Parser for the PaSh compiler in compiler/pash_compiler.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_pash_args() + + self.add_argument( + "compiled_script_file", + help="the file in which to output the compiled script", + ) + self.add_argument( + "input_ir", + help="the file containing the dataflow graph to be optimized and executed", + ) + self.add_argument( + "--var_file", + help="determines the path of a file containing all shell variables.", + default=None, + ) + + +class PreprocessorParser(BaseParser): + """ + Parser for the preprocessor in compiler/preprocessor/preprocessor.py + Generates two subparsers + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + subparser = self.add_subparsers(help="sub-command help") + self.add_pash_subparser(subparser) + self.add_spec_subparser(subparser) + + @staticmethod + def add_pash_subparser(subparser): + parser_pash = subparser.add_parser( + "pash", help="Preprocess the script so that it can be run with PaSh" + ) + parser_pash.add_pash_args() + parser_pash.add_argument("input", help="the script to be preprocessed") + parser_pash.set_defaults(preprocess_mode="pash") + + @staticmethod + def add_spec_subparser(subparser): + # create the parser for the "b" command + parser_spec = subparser.add_parser( + "spec", help="Preprocess the script so that it can be run with speculation" + ) + parser_spec.add_argument("input", help="the script to be preprocessed") + + ## TODO: When we better integrate, this should be automatically set. + parser_spec.add_argument( + "partial_order_file", + help="the file to store the partial order (currently just a sequence)", + ) + parser_spec.set_defaults(preprocess_mode="spec") diff --git a/compiler/config.py b/compiler/config.py index 618d7e676..1a4abdea6 100644 --- a/compiler/config.py +++ b/compiler/config.py @@ -2,7 +2,6 @@ import logging import os import subprocess -import math from util import * @@ -62,7 +61,6 @@ def set_config_globals_from_pash_args(given_pash_args): global pash_args, OUTPUT_TIME, DEBUG_LEVEL, LOG_FILE pash_args = given_pash_args - OUTPUT_TIME = pash_args.output_time DEBUG_LEVEL = pash_args.debug LOG_FILE = pash_args.log_file @@ -113,178 +111,6 @@ def load_config(config_file_path=""): config = pash_config -def getWidth(): - cpus = os.cpu_count() - return math.floor(cpus / 8) if cpus >= 16 else 2 - - -def add_general_config_arguments(parser): - ## TODO: Delete that at some point, or make it have a different use (e.g., outputting time even without -d 1). - parser.add_argument( - "-t", - "--output_time", # FIXME: --time - help="(obsolete, time is always logged now) output the time it took for every step", - action="store_true", - ) - parser.add_argument( - "-d", - "--debug", - type=int, - help="configure debug level; defaults to 0", - default=0, - ) - parser.add_argument( - "--log_file", - help="configure where to write the log; defaults to stderr.", - default="", - ) - - -## These are arguments that are common to pash.py and pash_compiler.py -def add_common_arguments(parser): - add_general_config_arguments(parser) - - parser.add_argument( - "-w", - "--width", - type=int, - default=getWidth(), - help="set data-parallelism factor", - ) - parser.add_argument( - "--no_optimize", - help="not apply transformations over the DFG", - action="store_true", - ) - parser.add_argument( - "--dry_run_compiler", - help="not execute the compiled script, even if the compiler succeeded", - action="store_true", - ) - parser.add_argument( - "--assert_compiler_success", - help="assert that the compiler succeeded (used to make tests more robust)", - action="store_true", - ) - parser.add_argument( - "--avoid_pash_runtime_completion", - help="avoid the pash_runtime execution completion (only relevant when --debug > 0)", - action="store_true", - ) - parser.add_argument( - "--profile_driven", - help="(experimental) use profiling information when optimizing", - action="store_true", - ) - parser.add_argument( - "-p", - "--output_optimized", # FIXME: --print - help="output the parallel shell script for inspection", - action="store_true", - ) - parser.add_argument( - "--graphviz", - help="generates graphical representations of the dataflow graphs. The option argument corresponds to the format. PaSh stores them in a timestamped directory in the argument of --graphviz_dir", - choices=["no", "dot", "svg", "pdf", "png"], - default="no", - ) - ## TODO: To discuss: Do we maybe want to have graphviz to always be included - ## in the temp directory (under a graphviz subdirectory) instead of in its own? - ## kk: I think that ideally we want a log-directory where we can put logs, graphviz, - ## and other observability and monitoring info (instead of putting them in the temp). - parser.add_argument( - "--graphviz_dir", - help="the directory in which to store graphical representations", - default="/tmp", - ) - parser.add_argument( - "--no_eager", - help="(experimental) disable eager nodes before merging nodes", - action="store_true", - ) - parser.add_argument( - "--no_daemon", - help="(obsolete) does nothing -- Run the compiler everytime we need a compilation instead of using the daemon", - action="store_true", - default=False, - ) - parser.add_argument( - "--parallel_pipelines", - help="(obsolete) Run multiple pipelines in parallel if they are safe to run. Now true by default. See --no_parallel_pipelines.", - action="store_true", - default=True, - ) - parser.add_argument( - "--no_parallel_pipelines", - help="Disable parallel running of independent pipelines", - action="store_true", - default=False, - ) - parser.add_argument( - "--parallel_pipelines_limit", - help="Maximum number of parallel independent pipelines", - type=int, - default=2, - ) - parser.add_argument( - "--r_split_batch_size", - type=int, - help="configure the batch size of r_split (default: 1MB)", - default=1000000, - ) - parser.add_argument( - "--r_split", - help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", - action="store_true", - ) - parser.add_argument( - "--dgsh_tee", - help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", - action="store_true", - ) - parser.add_argument( - "--speculative", - help="(experimental) use the speculative execution preprocessing and runtime (NOTE: this has nothing to do with --speculation, which is actually misnamed, and should be named concurrent compilation/execution and is now obsolete)", - action="store_true", - default=False, - ) - ## This is misnamed, it should be named concurrent compilation/execution - parser.add_argument( - "--speculation", - help="(obsolete) does nothing -- run the original script during compilation; if compilation succeeds, abort the original and run only the parallel (quick_abort) (Default: no_spec)", - choices=["no_spec", "quick_abort"], - default="no_spec", - ) - parser.add_argument( - "--termination", - help="(experimental) determine the termination behavior of the DFG. Defaults to cleanup after the last process dies, but can drain all streams until depletion", - choices=["clean_up_graph", "drain_stream"], - default="clean_up_graph", - ) - parser.add_argument( - "--daemon_communicates_through_unix_pipes", - help="(experimental) the daemon communicates through unix pipes instead of sockets", - action="store_true", - ) - parser.add_argument( - "--distributed_exec", - help="(experimental) execute the script in a distributed environment. Remote machines should be configured and ready", - action="store_true", - default=False, - ) - parser.add_argument( - "--config_path", - help="determines the config file path. By default it is 'PASH_TOP/compiler/config.yaml'.", - default="", - ) - parser.add_argument( - "--version", - action="version", - version="%(prog)s {version}".format(version=__version__), - ) - return - - def pass_common_arguments(pash_arguments): arguments = [] if pash_arguments.no_optimize: @@ -297,8 +123,6 @@ def pass_common_arguments(pash_arguments): arguments.append("--avoid_pash_runtime_completion") if pash_arguments.profile_driven: arguments.append("--profile_driven") - if pash_arguments.output_time: - arguments.append("--output_time") if pash_arguments.output_optimized: arguments.append("--output_optimized") arguments.append("--graphviz") diff --git a/compiler/pash.py b/compiler/pash.py index 627da39af..6554bcc1b 100755 --- a/compiler/pash.py +++ b/compiler/pash.py @@ -1,19 +1,14 @@ import sys import os import subprocess -import argparse -from datetime import datetime - -from shell_ast import ast_to_ast from ir import * -from parse import parse_shell_to_asts_interactive from pash_graphviz import maybe_init_graphviz_dir from preprocessor.preprocessor import preprocess from speculative import util_spec from util import * import config -import shutil +from cli import RunnerParser LOGGING_PREFIX = "PaSh: " @@ -72,69 +67,7 @@ def parse_args(): if "PASH_FROM_SH" in os.environ: prog_name = os.environ["PASH_FROM_SH"] ## We need to set `+` as a prefix char too - parser = argparse.ArgumentParser(prog_name, prefix_chars="-+") - parser.add_argument( - "input", - nargs="*", - help="the script to be compiled and executed (followed by any command-line arguments", - ) - parser.add_argument( - "--preprocess_only", - help="only preprocess the input script and not execute it", - action="store_true", - ) - parser.add_argument( - "--output_preprocessed", - help=" output the preprocessed script", - action="store_true", - ) - parser.add_argument( - "--interactive", - help="Executes the script using an interactive internal shell session (experimental)", - action="store_true", - ) - parser.add_argument( - "-c", - "--command", - help="Evaluate the following as a script, rather than a file", - default=None, - ) - ## This is not the correct way to parse these, because more than one option can be given together, e.g., -ae - parser.add_argument( - "-a", - help="Enabling the `allexport` shell option", - action="store_true", - default=False, - ) - parser.add_argument( - "+a", - help="Disabling the `allexport` shell option", - action="store_false", - default=False, - ) - ## These two are here for compatibility with respect to bash - parser.add_argument( - "-v", - help="(experimental) prints shell input lines as they are read", - action="store_true", - ) - parser.add_argument( - "-x", - help="(experimental) prints commands and their arguments as they execute", - action="store_true", - ) - ## Deprecated argument... keeping here just to output the message - ## TODO: Do that with a custom argparse Action (KK: I tried and failed) - parser.add_argument( - "--expand_using_bash_mirror", - help="DEPRECATED: instead of expanding using the internal expansion code, expand using a bash mirror process (slow)", - action="store_true", - ) - - ## Set the preprocessing mode to PaSh - parser.set_defaults(preprocess_mode="pash") - - config.add_common_arguments(parser) + parser = RunnerParser(prog_name, prefix_chars="-+") args = parser.parse_args() config.set_config_globals_from_pash_args(args) @@ -159,13 +92,6 @@ def parse_args(): log(arg_name, arg_val) log("-" * 40) - ## Print the deprecated argument - if args.expand_using_bash_mirror: - log( - "WARNING: Option --expand_using_bash_mirror is deprecated and is *ignored*.", - level=0, - ) - ## TODO: We might need to have a better default (like $0 of pa.sh) shell_name = "pash" diff --git a/compiler/pash_compilation_server.py b/compiler/pash_compilation_server.py index bcc6d3279..51f531574 100644 --- a/compiler/pash_compilation_server.py +++ b/compiler/pash_compilation_server.py @@ -1,6 +1,4 @@ -import argparse import signal -import traceback from threading import Thread from datetime import datetime, timedelta @@ -15,6 +13,8 @@ from dspash.worker_manager import WorkersManager import server_util +from cli import BaseParser + ## ## A Daemon (not with the strict Unix sense) ## that responds to requests for compilation @@ -30,9 +30,9 @@ def handler(signum, frame): def parse_args(): - parser = argparse.ArgumentParser(add_help=False) - config.add_common_arguments(parser) - args, unknown_args = parser.parse_known_args() + parser = BaseParser(add_help=False) + parser.add_pash_args() + args, _ = parser.parse_known_args() return args diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index 6b4e6829a..c4fc7282e 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -1,13 +1,8 @@ -import argparse import sys import pickle import traceback from datetime import datetime -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorKind import ( - AggregatorKindEnum, -) - from sh_expand import env_vars_util import config @@ -19,11 +14,9 @@ from definitions.ir.aggregator_node import * -from definitions.ir.dfg_node import DFGNode from definitions.ir.nodes.eager import * from definitions.ir.nodes.pash_split import * -import definitions.ir.nodes.r_merge as r_merge import definitions.ir.nodes.r_split as r_split import definitions.ir.nodes.r_unwrap as r_unwrap import definitions.ir.nodes.dgsh_tee as dgsh_tee @@ -32,6 +25,8 @@ # Distirbuted Exec import dspash.hdfs_utils as hdfs_utils +from cli import CompilerParser + runtime_config = {} @@ -74,21 +69,8 @@ def main_body(): def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "compiled_script_file", help="the file in which to output the compiled script" - ) - parser.add_argument( - "input_ir", - help="the file containing the dataflow graph to be optimized and executed", - ) - parser.add_argument( - "--var_file", - help="determines the path of a file containing all shell variables.", - default=None, - ) - config.add_common_arguments(parser) - args, unknown_args = parser.parse_known_args() + parser = CompilerParser() + args, _ = parser.parse_known_args() return args diff --git a/compiler/preprocessor/preprocessor.py b/compiler/preprocessor/preprocessor.py index 11139e17b..817aeaf84 100644 --- a/compiler/preprocessor/preprocessor.py +++ b/compiler/preprocessor/preprocessor.py @@ -1,14 +1,13 @@ -import argparse from datetime import datetime import os import config from shell_ast import transformation_options, ast_to_ast -from ir import FileIdGen from parse import parse_shell_to_asts, from_ast_objects_to_shell from util import * import server_util from speculative import util_spec +from cli import PreprocessorParser LOGGING_PREFIX = "PaSh Preprocessor: " @@ -82,36 +81,8 @@ def preprocess_asts(ast_objects, args): return preprocessed_asts -## -## This is the command line interface for the preprocessor -## def main(): - parser = argparse.ArgumentParser() - config.add_general_config_arguments(parser) - - subparsers = parser.add_subparsers(help="sub-command help") - - # create the parser for the "a" command - parser_pash = subparsers.add_parser( - "pash", help="Preprocess the script so that it can be run with PaSh" - ) - config.add_common_arguments(parser_pash) - parser_pash.add_argument("input", help="the script to be preprocessed") - parser_pash.set_defaults(preprocess_mode="pash") - - # create the parser for the "b" command - parser_spec = subparsers.add_parser( - "spec", help="Preprocess the script so that it can be run with speculation" - ) - parser_spec.add_argument("input", help="the script to be preprocessed") - - ## TODO: When we better integrate, this should be automatically set. - parser_spec.add_argument( - "partial_order_file", - help="the file to store the partial order (currently just a sequence)", - ) - parser_spec.set_defaults(preprocess_mode="spec") - + parser = PreprocessorParser() args = parser.parse_args() config.set_config_globals_from_pash_args(args) diff --git a/docs/README.md b/docs/README.md index fe87af96c..bae31490b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,12 +1,12 @@ # PaSh Documentation -Quick Jump: [using pash](#using-pash) | [videos](#videos--video-presentations) | [papers](#academic-papers--events) +Quick Jump: [using pash](#using-pash) | [videos](#videos--video-presentations) | [papers](#academic-papers--events) ## Using PaSh The following resources offer overviews of important PaSh components. -* Short tutorial: [introduction](./tutorial#introduction), [installation](./install#installation), [execution](./tutorial#running-scripts), and [next steps](./tutorial#what-next) -* Annotations: [parallelizability](../annotations#main-parallelizability-classes), [study](../annotations#parallelizability-study-of-commands-in-gnu--posix), [example 1](../annotations#a-simple-example-chmod), [example 2](../annotations#another-example-cut), [howto](../annotations#how-to-annotate-a-command) +* Short tutorial: [introduction](tutorial.md#introduction), [installation](install.md#installation), [execution](tutorial.md#running-scripts), and [next steps](tutorial.md#what-next) + * Compiler: [intro](../compiler#introduction), [overview](../compiler#compiler-overview), [details](../compiler#zooming-into-fragments), [earlier versions](../compiler#earlier-versions) * Runtime: [split](../runtime#stream-splitting), [eager](../runtime#eager-stream-polling), [cleanup](../runtime#cleanup-logic), [aggregate](../runtime#aggregators) * Scripts: [one-liners](../evaluation/benchmarks/#common-unix-one-liners), [unix50](../evaluation/benchmarks/#unix-50-from-bell-labs), [weather analysis](../evaluation/benchmarks/#noaa-weather-analysis), [web indexing](../evaluation/benchmarks/#wikipedia-web-indexing) @@ -23,18 +23,18 @@ The following presentations offer short PaSh introductions: Academic papers, presentations, and other events related to PaSh. -**An Order-aware Dataflow Model for Parallel Unix Pipelines** -Shivam Handa*, Konstantinos Kallas*, Nikos Vasilakis*, Martin Rinard +**An Order-aware Dataflow Model for Parallel Unix Pipelines** +Shivam Handa*, Konstantinos Kallas*, Nikos Vasilakis*, Martin Rinard [pdf](https://arxiv.org/pdf/2012.15422.pdf) | [doi](https://doi.org/10.1145/3473570) | [event](https://icfp21.sigplan.org/) -**The Future of the Shell: UNIX and beyond** -Michael Greenberg*, Konstantinos Kallas*, Nikos Vasilakis* +**The Future of the Shell: UNIX and beyond** +Michael Greenberg*, Konstantinos Kallas*, Nikos Vasilakis* [pdf](https://fut-shell.github.io/panel-summary.pdf) | [doi](https://doi.org/10.1145/3458336.3465296) | [event](https://sigops.org/s/conferences/hotos/2021/#program) -**UNIX Shell Programming: The Next 50 Years** -Michael Greenberg*, Konstantinos Kallas*, Nikos Vasilakis* +**UNIX Shell Programming: The Next 50 Years** +Michael Greenberg*, Konstantinos Kallas*, Nikos Vasilakis* [pdf](https://dl.acm.org/doi/pdf/10.1145/3458336.3465294) | [doi](https://doi.org/10.1145/3458336.3465294) | [event](https://sigops.org/s/conferences/hotos/2021/#program) -**PaSh: Light-touch Data-Parallel Shell Processing** -Nikos Vasilakis*, Konstantinos Kallas*, Konstantinos Mamouras, Achilles Benetopoulos, Lazar Cvetković +**PaSh: Light-touch Data-Parallel Shell Processing** +Nikos Vasilakis*, Konstantinos Kallas*, Konstantinos Mamouras, Achilles Benetopoulos, Lazar Cvetković [pdf](https://dl.acm.org/doi/pdf/10.1145/3447786.3456228) | [doi](https://doi.org/10.1145/3447786.3456228) | [event](https://2021.eurosys.org/) diff --git a/docs/tooling/ci.md b/docs/ci.md similarity index 95% rename from docs/tooling/ci.md rename to docs/ci.md index b97b7419c..6d8f0ca36 100644 --- a/docs/tooling/ci.md +++ b/docs/ci.md @@ -1,12 +1,13 @@ ## Continuous Integration + A text-based continuous integration (CI) sever is set up at [pash.ndr.md](http://pash.ndr.md). -The focus of CI is to monitor correctness, not performance: +The focus of CI is to monitor correctness, not performance: it runs all of PaSh's benchmarks, with small inputs, and compares results with the sequential execution. It additionally runs and reports on the tests from the Smoosh suite. -#### Summary +#### Summary To get the summary of the latest 5 builds: diff --git a/docs/contributing/contrib.md b/docs/contributing.md similarity index 97% rename from docs/contributing/contrib.md rename to docs/contributing.md index 9b7b60582..889cae19c 100644 --- a/docs/contributing/contrib.md +++ b/docs/contributing.md @@ -124,7 +124,7 @@ Restart-Computer Run the `wsl` command (or find the installed Linux distribution in Windows Start menu and run it). After a few minutes of installation, enter a username and password for the internal WSL account to be created. -Continue the PaSh installation process from [here](https://github.com/binpash/pash/blob/main/docs/tutorial/tutorial.md#installation) inside the WSL installation. +Continue the PaSh installation process from [here](tutorial.md#installation) inside the WSL installation. ## Docker TODO @@ -137,7 +137,7 @@ Here are commands related to launching a screen session: * `screen` -> start a new session * `screen -ls` -> show all screen sessions in this machine * `screen -x ` -> attach to screen with id ``, as shown by `-ls` above. -* `screen -x /` -> attach to session `pash` of user `user`, assuming it exists/running; +* `screen -x /` -> attach to session `pash` of user `user`, assuming it exists/running; When in a `screen` session, all `screen`-related commands are prefixed by `ctr-a` (which means pressing `ctrl` and `a` _together_, and _then_ pressing the followup character). Here are the 5 most useful commands: * `ctrl-a c` -> create­a new window in the current session @@ -168,7 +168,7 @@ git merge master # fetch changes from main/master (You can use `rebase` instead of `merge` if your branch is local and hasn't been pushed to GitHub, but `merge` if your branch is already pushed.) -## Process for Using EC2 Instance +## Process for Using EC2 Instance A reason to use Amazon Elastic Compute Cloud (EC2) is having insufficient computing power in your local machine. The steps to do are as follows. The generated key is of the form user@hostname. diff --git a/docs/install/README.md b/docs/install.md similarity index 90% rename from docs/install/README.md rename to docs/install.md index d6aa42d27..3ccba3f7e 100644 --- a/docs/install/README.md +++ b/docs/install.md @@ -49,7 +49,7 @@ We refresh this image (as well as other images) on every major release. [//]: # "TODO(@nvasilakis, @dkarnikis): Need to automate this per release." _Build Image (Latest Commit):_ -To build the latest Docker container, run `docker build` in [scripts/docker](https://github.com/binpash/pash/tree/main/scripts/docker): +To build the latest Docker container, run `docker build` in [scripts/docker](../scripts/docker): ```sh git clone git@github.com:binpash/pash.git cd pash/scripts/docker/ @@ -64,11 +64,11 @@ docker run --name pash-play -it pash:latest ``` PaSh can be found in the container's `/opt/pash` directory, so run `cd pash; git pull` to fetch the latest updates. -More information in the [pash-on-docker guide](../contributing/contrib.md#pash-on-docker-a-pocket-guide). +More information in the [pash-on-docker guide](contributing.md#pash-on-docker-a-pocket-guide). ### Windows using WSL To run PaSh on windows without Docker, install [WSL](https://docs.microsoft.com/en-us/windows/wsl/install-win10). -A short tutorial is included in the [contributing](../contributing/contrib.md) guide. +A short tutorial is included in the [contributing](contributing.md) guide. [//]: # "TODO(@nvasilakis, @dkarnikis): Need to add instructions for OS X." diff --git a/docs/contributing/releases.md b/docs/releases.md similarity index 100% rename from docs/contributing/releases.md rename to docs/releases.md diff --git a/docs/tutorial/tutorial.md b/docs/tutorial.md similarity index 98% rename from docs/tutorial/tutorial.md rename to docs/tutorial.md index 153ba8d36..0b3bce73d 100644 --- a/docs/tutorial/tutorial.md +++ b/docs/tutorial.md @@ -2,7 +2,7 @@ Quick jump: [Introduction](#introduction) | [Running Scripts](#running-scripts) | [What Next?](#what-next) This short tutorial covers the `pash`'s main functionality. -Before proceeding, make sure [you have installed PaSh](../install/) +Before proceeding, make sure [you have installed PaSh](install.md) ## Introduction @@ -17,11 +17,11 @@ Consider the following spell-checking script, applied to two large markdown file ```sh # spell-checking.sh -cat f1.md f2.md | +cat f1.md f2.md | tr A-Z a-z | tr -cs A-Za-z '\n' | sort | - uniq | + uniq | comm -13 dict.txt - > out cat out | wc -l | sed 's/$/ mispelled words!/' ``` @@ -98,7 +98,7 @@ On our evaluation infrastructure, the script takes about 41s. To execute it using `pash` with 2x-parallelism: ```sh time $PASH_TOP/pa.sh -w 2 -d 1 --log_file pash.log demo-spell.sh > pash-spell.out -``` +``` On our evaluation infrastructure, the 2x-parallel script takes about 28s. You can check that the results are correct by: @@ -109,7 +109,7 @@ diff spell.out pash-spell.out Assuming you have more than 8 CPUs, you could also execute it with 8x-parallelism using: ```sh time $PASH_TOP/pa.sh -w 8 -d 1 --log_file pash.log demo-spell.sh > pash-spell.out -``` +``` On our evaluation infrastructure, the 8x-parallel script takes about 14s. To view the parallel code emitted by the compiler, you can inspect the log: @@ -163,12 +163,12 @@ This section includes pointers for further exploration, depending on your needs. #### The PaSh Repo -PaSh consist of three main components and a few additional "auxiliary" files and directories. +PaSh consist of three main components and a few additional "auxiliary" files and directories. The three main components are: -* [annotations](../../annotations/): DSL characterizing commands, parallelizability study, and associated annotations. More specifically, (i) a lightweight annotation language allows command developers to express key parallelizability properties about their commands; (ii) an accompanying parallelizability study of POSIX and GNU commands. guides the annotation language and optimized aggregator library +* [annotations](../../annotations/): DSL characterizing commands, parallelizability study, and associated annotations. More specifically, (i) a lightweight annotation language allows command developers to express key parallelizability properties about their commands; (ii) an accompanying parallelizability study of POSIX and GNU commands. guides the annotation language and optimized aggregator library -* [compiler](../../compiler): Shell-dataflow translations and associated parallelization transformations. Given a script, the PaSh compiler converts it to a dataflow graph, performs a series of semantics-preserving program transformations that expose parallelism, and then converts the dataflow graph back into a POSIX script. +* [compiler](../../compiler): Shell-dataflow translations and associated parallelization transformations. Given a script, the PaSh compiler converts it to a dataflow graph, performs a series of semantics-preserving program transformations that expose parallelism, and then converts the dataflow graph back into a POSIX script. * [runtime](../../runtime): Runtime components such as `eager`, `split`, and associated combiners. Apart from POSIX constructs added to guide parallelism explicitly, PaSh provides Unix-aware runtime primitives for addressing performance- and correctness-related issues. @@ -190,7 +190,7 @@ Chat: * [Discord Server](https://discord.com/channels/947328962739187753/) ([Invite](http://join.binpa.sh/)) -Mailing Lists: +Mailing Lists: * [Discussion](https://groups.google.com/g/pash-dev): Join this mailing list for discussing all things `pash` * [Commits](https://groups.google.com/g/pash-commits): Join this mailing list for commit notifications