Skip to content

Commit

Permalink
Update command line arguments and simplify clp-s.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
gibber9809 committed Dec 16, 2024
1 parent df0144a commit cc75ffe
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 87 deletions.
145 changes: 119 additions & 26 deletions components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#include "CommandLineArguments.hpp"

#include <filesystem>
#include <iostream>

#include <boost/program_options.hpp>
#include <fmt/core.h>
#include <spdlog/spdlog.h>

#include "../clp/cli_utils.hpp"
Expand Down Expand Up @@ -131,16 +133,19 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
throw std::invalid_argument(std::string("Unknown action '") + command_input + "'");
}

constexpr std::string_view cNoAuth{"none"};
constexpr std::string_view cS3Auth{"s3"};
if (Command::Compress == m_command) {
po::options_description compression_positional_options;
std::vector<std::string> input_paths;
// clang-format off
compression_positional_options.add_options()(
"archives-dir",
po::value<std::string>(&m_archives_dir)->value_name("DIR"),
"output directory"
)(
"input-paths",
po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
po::value<std::vector<std::string>>(&input_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on
Expand All @@ -151,6 +156,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
constexpr std::string_view cJsonFileType{"json"};
constexpr std::string_view cKeyValueIrFileType{"kv-ir"};
std::string file_type{cJsonFileType};
std::string auth{cNoAuth};
// clang-format off
compression_options.add_options()(
"compression-level",
Expand Down Expand Up @@ -209,6 +215,14 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"file-type",
po::value<std::string>(&file_type)->value_name("FILE_TYPE")->default_value(file_type),
"The type of file being compressed (json or kv-ir)"
)(
"auth",
po::value<std::string>(&auth)
->value_name("AUTH_TYPE")
->default_value(auth),
"Type of authentication required for network requests (s3 | none). Authentication"
" with s3 requires the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment"
" variables."
);
// clang-format on

Expand Down Expand Up @@ -252,13 +266,19 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) {
if (false == read_paths_from_file(input_path_list_file_path, input_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

if (m_file_paths.empty()) {
for (auto const& path : input_paths) {
if (false == get_input_files_for_raw_path(path, m_input_paths)) {
throw std::invalid_argument(fmt::format("Invalid input path \"{}\".", path));
}
}

if (m_input_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}

Expand All @@ -278,6 +298,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
throw std::invalid_argument("Unknown FILE_TYPE: " + file_type);
}

if (cS3Auth == auth) {
m_network_auth.method = AuthMethod::S3PresignedUrlV4;
} else if (cNoAuth != auth) {
throw std::invalid_argument(fmt::format("Invalid authentication type \"{}\"", auth)
);
}

// Parse and validate global metadata DB config
if (false == metadata_db_config_file_path.empty()) {
clp::GlobalMetadataDBConfig metadata_db_config;
Expand All @@ -302,27 +329,22 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
}
} else if ((char)Command::Extract == command_input) {
po::options_description extraction_options;
std::string archive_path;
// clang-format off
extraction_options.add_options()(
"archives-dir",
po::value<std::string>(&m_archives_dir),
"The directory containing the archives"
"archive-path",
po::value<std::string>(&archive_path),
"Path to a directory containing archives, or the path to a single archive"
)(
"output-dir",
po::value<std::string>(&m_output_dir),
"The output directory for the decompressed file"
);
// clang-format on

po::options_description input_options("Input Options");
input_options.add_options()(
"archive-id",
po::value<std::string>(&m_archive_id)->value_name("ID"),
"ID of the archive to decompress"
);
extraction_options.add(input_options);

po::options_description decompression_options("Decompression Options");
std::string auth{cNoAuth};
std::string archive_id;
// clang-format off
decompression_options.add_options()(
"ordered",
Expand All @@ -335,6 +357,19 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
->value_name("SIZE"),
"Chunk size (B) for each output file when decompressing records in log order."
" When set to 0, no chunking is performed."
)(
"archive-id",
po::value<std::string>(&archive_id)->value_name("ID"),
"Limit decompression to the archive with the given ID in a subdirectory of"
" archive-path"
)(
"auth",
po::value<std::string>(&auth)
->value_name("AUTH_TYPE")
->default_value(auth),
"Type of authentication required for network requests (s3 | none). Authentication"
" with s3 requires the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment"
" variables."
);
// clang-format on
extraction_options.add(decompression_options);
Expand All @@ -354,7 +389,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
extraction_options.add(output_metadata_options);

po::positional_options_description positional_options;
positional_options.add("archives-dir", 1);
positional_options.add("archive-path", 1);
positional_options.add("output-dir", 1);

std::vector<std::string> unrecognized_options
Expand Down Expand Up @@ -382,15 +417,38 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {

po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(input_options);
visible_options.add(decompression_options);
visible_options.add(output_metadata_options);
std::cerr << visible_options << std::endl;
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No archives directory specified");
if (archive_path.empty()) {
throw std::invalid_argument("No archive path specified");
}

if (false == archive_id.empty()) {
auto archive_fs_path = std::filesystem::path(archive_path) / archive_id;
if (false == std::filesystem::exists(archive_fs_path)) {
throw std::invalid_argument("Requested archive does not exist");
}
m_input_paths.emplace_back(clp_s::Path{
.source{clp_s::InputSource::Filesystem},
.path{archive_fs_path.string()}
});
} else if (false == get_input_archives_for_raw_path(archive_path, m_input_paths)) {
throw std::invalid_argument("Invalid archive path");
}

if (m_input_paths.empty()) {
throw std::invalid_argument("No archive paths specified");
}

if (cS3Auth == auth) {
m_network_auth.method = AuthMethod::S3PresignedUrlV4;
} else if (cNoAuth != auth) {
throw std::invalid_argument(fmt::format("Invalid authentication type \"{}\"", auth)
);
}

if (m_output_dir.empty()) {
Expand Down Expand Up @@ -422,11 +480,12 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {

po::options_description search_options;
std::string output_handler_name;
std::string archive_path;
// clang-format off
search_options.add_options()(
"archives-dir",
po::value<std::string>(&m_archives_dir),
"The directory containing the archives"
"archive-path",
po::value<std::string>(&archive_path),
"Path to a directory containing archives, or the path to a single archive"
)(
"query,q",
po::value<std::string>(&m_query),
Expand All @@ -440,12 +499,14 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
);
// clang-format on
po::positional_options_description positional_options;
positional_options.add("archives-dir", 1);
positional_options.add("archive-path", 1);
positional_options.add("query", 1);
positional_options.add("output-handler", 1);
positional_options.add("output-handler-args", -1);

po::options_description match_options("Match Controls");
std::string auth{cNoAuth};
std::string archive_id;
// clang-format off
match_options.add_options()(
"tge",
Expand All @@ -461,8 +522,8 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"Ignore case distinctions between values in the query and the compressed data"
)(
"archive-id",
po::value<std::string>(&m_archive_id)->value_name("ID"),
"Limit search to the archive with the given ID"
po::value<std::string>(&archive_id)->value_name("ID"),
"Limit search to the archive with the given ID in a subdirectory of archive-path"
)(
"projection",
po::value<std::vector<std::string>>(&m_projection_columns)
Expand All @@ -471,6 +532,14 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"Project only the given set of columns for matching results. This option must be"
" specified after all positional options. Values that are objects or structured"
" arrays are currently unsupported."
)(
"auth",
po::value<std::string>(&auth)
->value_name("AUTH_TYPE")
->default_value(auth),
"Type of authentication required for network requests (s3 | none). Authentication"
" with s3 requires the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment"
" variables."
);
// clang-format on
search_options.add(match_options);
Expand Down Expand Up @@ -622,8 +691,32 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No archives directory specified");
if (archive_path.empty()) {
throw std::invalid_argument("No archive path specified");
}

if (false == archive_id.empty()) {
auto archive_fs_path = std::filesystem::path(archive_path) / archive_id;
if (false == std::filesystem::exists(archive_fs_path)) {
throw std::invalid_argument("Requested archive does not exist");
}
m_input_paths.emplace_back(clp_s::Path{
.source{clp_s::InputSource::Filesystem},
.path{archive_fs_path.string()}
});
} else if (false == get_input_archives_for_raw_path(archive_path, m_input_paths)) {
throw std::invalid_argument("Invalid archive path");
}

if (m_input_paths.empty()) {
throw std::invalid_argument("No archive paths specified");
}

if (cS3Auth == auth) {
m_network_auth.method = AuthMethod::S3PresignedUrlV4;
} else if (cNoAuth != auth) {
throw std::invalid_argument(fmt::format("Invalid authentication type \"{}\"", auth)
);
}

if (m_query.empty()) {
Expand Down
13 changes: 6 additions & 7 deletions components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "../clp/GlobalMetadataDBConfig.hpp"
#include "../reducer/types.hpp"
#include "Defs.hpp"
#include "InputConfig.hpp"

namespace clp_s {
class CommandLineArguments {
Expand Down Expand Up @@ -51,7 +52,9 @@ class CommandLineArguments {

Command get_command() const { return m_command; }

std::vector<std::string> const& get_file_paths() const { return m_file_paths; }
std::vector<Path> const& get_input_paths() const { return m_input_paths; }

NetworkAuthOption const& get_network_auth() const { return m_network_auth; }

std::string const& get_archives_dir() const { return m_archives_dir; }

Expand Down Expand Up @@ -87,8 +90,6 @@ class CommandLineArguments {

bool get_ignore_case() const { return m_ignore_case; }

std::string const& get_archive_id() const { return m_archive_id; }

std::optional<clp::GlobalMetadataDBConfig> const& get_metadata_db_config() const {
return m_metadata_db_config;
}
Expand Down Expand Up @@ -177,7 +178,8 @@ class CommandLineArguments {
Command m_command;

// Compression and decompression variables
std::vector<std::string> m_file_paths;
std::vector<Path> m_input_paths;
NetworkAuthOption m_network_auth{};
std::string m_archives_dir;
std::string m_output_dir;
std::string m_timestamp_key;
Expand Down Expand Up @@ -213,9 +215,6 @@ class CommandLineArguments {
bool m_ignore_case{false};
std::vector<std::string> m_projection_columns;

// Decompression and search variables
std::string m_archive_id;

// Search aggregation variables
std::string m_reducer_host;
int m_reducer_port{-1};
Expand Down
Loading

0 comments on commit cc75ffe

Please sign in to comment.