diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..177567f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,60 @@ +name: GitHub CI/CD Workflow + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + container: + image: ubuntu:24.04 + env: + OPENSSL_DIR: /usr/include/openssl + OPENSSL_LIB_DIR: /usr/lib/x86_64-linux-gnu + OPENSSL_INCLUDE_DIR: /usr/include/openssl + + steps: + - name: Install dependencies + run: | + apt-get update + apt-get install -y curl git build-essential libssl-dev + + - uses: actions/checkout@v4 # checkout the repository + - name: Install Rust + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Set up JDK 11 # required for build-models.sh + uses: actions/setup-java@v3 + with: + distribution: 'adopt' + java-version: '11' + + - name: Install Go + uses: actions/setup-go@v4 + with: + go-version: '^1.22' + - name: Install Funnel + run: | + if [ -d "funnel" ]; then rm -Rf funnel; fi + git clone https://github.com/ohsu-comp-bio/funnel.git + cd funnel && make && make install && cd .. + - uses: actions/checkout@v4 # checkout the repository + - name: Build models + run: | + bash ./build-models.sh + - name: Build + run: | + cargo build --verbose + - name: Run tests + run: | + bash ./run-tests.sh + - name: Lint + run: | + cargo clippy -- -D warnings + - name: Format + run: | + cargo fmt -- ./lib/src/serviceinfo/models/*.rs # workaround to fix autogenerated code formatting + cargo fmt -- ./lib/src/tes/models/*.rs + cargo fmt -- --check \ No newline at end of file diff --git a/.github/workflows/local.yml b/.github/workflows/local.yml new file mode 100644 index 0000000..d0b039f --- /dev/null +++ b/.github/workflows/local.yml @@ -0,0 +1,98 @@ +name: Local CI/CD Workflow + +on: workflow_dispatch + +jobs: + build: + + runs-on: ubuntu-latest + container: + image: ubuntu:24.04 + env: + OPENSSL_DIR: /usr/include/openssl + OPENSSL_LIB_DIR: /usr/lib/x86_64-linux-gnu + OPENSSL_INCLUDE_DIR: /usr/include/openssl + + steps: + + - name: Cache Rust dependencies + uses: actions/cache@v3 + with: + path: ~/.cargo + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo- + + - name: Cache Rust build output + uses: actions/cache@v3 + with: + path: target + key: ${{ runner.os }}-build-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-build- + + - name: Cache Maven dependencies + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven- + + - name: Cache Go modules + uses: actions/cache@v3 + with: + path: ~/.cache/go-build + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: ${{ runner.os }}-go- + + - name: Cache Funnel dependencies + uses: actions/cache@v3 + with: + path: ~/funnel/build + key: ${{ runner.os }}-funnel-${{ hashFiles('**/funnel/*') }} + restore-keys: ${{ runner.os }}-funnel- + + - uses: actions/checkout@v4 # checkout the repository + - name: Install Rust + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Set up JDK 11 # required for build-models.sh + uses: actions/setup-java@v3 + with: + distribution: 'adopt' + java-version: '11' + + - name: Install Go + uses: actions/setup-go@v4 + with: + go-version: '^1.22' + - name: Install Funnel + run: | + if [ -d "funnel" ]; then rm -Rf funnel; fi + git clone https://github.com/ohsu-comp-bio/funnel.git + cd funnel && make && make install && cd .. + - uses: actions/checkout@v4 # checkout the repository + - name: Build models + run: | + . $HOME/.cargo/env + bash ./build-models.sh + - name: Build + run: | + . $HOME/.cargo/env + cargo build --verbose + - name: Run tests + run: | + . $HOME/.cargo/env + bash ./run-tests.sh + - name: Lint + run: | + . $HOME/.cargo/env + cargo clippy -- -D warnings + - name: Format + run: | + . $HOME/.cargo/env + # rustup install nightly – fails for some reason + # rustup default nightly + cargo fmt -- ./lib/src/serviceinfo/models/*.rs # workaround to fix autogenerated code formatting + cargo fmt -- ./lib/src/tes/models/*.rs + cargo fmt -- --check # --config-path ./rustfmt.toml \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..61675d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +target/ +debug/ +Cargo.lock +openapitools.json +*.log +funnel-work-dir/ +funnel/ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a45a957 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[workspace] +members = [ + "lib" +] + +[workspace.package] +version = "0.1.0" +authors = ["Aarav Mehta (Google Summer of Code Participant)", "Pavel Nikonorov (Google Summer of Code Mentor; GENXT)", "ELIXIR Cloud & AAI (ELIXIR Europe)"] +edition = "2021" +readme = "./README.md" +license-file = "LICENSE.txt" +repository = "https://github.com/elixir-cloud-aai/ga4gh-sdk.git" diff --git a/README.md b/README.md new file mode 100644 index 0000000..ea044b1 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# A Generic SDK and CLI for GA4GH API services + +## Building + +First, clone the repository, and then run the following command to automatically generate models using OpenAPI specifications: +``` +bash ./build-models.sh +``` + +To build the project: +``` +cargo build +``` + +## Running the tests + +Before running the tests, you need to install Funnel, a task execution system that is compatible with the GA4GH TES API. Follow the instructions in the [Funnel Developer's Guide](https://ohsu-comp-bio.github.io/funnel/docs/development/developers/) to install Funnel. + +Once you have installed Funnel, you can run the tests. This will automatically run Funnel as well: + +``` +bash ./run-tests.sh +``` +or, you can run using: +``` +cargo nextest run +``` +For checking the unit coverage, you can run: +``` +cargo llvm-cov nextest +``` + +To test the CI/CD workflow locally, install `act` and run the following command: +``` +act -j build --container-architecture linux/amd64 -P ubuntu-latest=ubuntu:24.04 --reuse +``` \ No newline at end of file diff --git a/build-models.sh b/build-models.sh new file mode 100755 index 0000000..58b9dbe --- /dev/null +++ b/build-models.sh @@ -0,0 +1,79 @@ +# Exit immediately if a command exits with a non-zero status. +set -e + +set -u + +# Ensure the OpenAPI Generator JAR file is set up +mkdir -p ~/bin/openapitools +OPENAPI_GENERATOR_JAR=~/bin/openapitools/openapi-generator-cli.jar +if [ ! -f "$OPENAPI_GENERATOR_JAR" ]; then + curl -L https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/7.7.0/openapi-generator-cli-7.7.0.jar -o "$OPENAPI_GENERATOR_JAR" +fi + +get_git_repo_name() { + # Extract the URL of the remote "origin" + url=$(git config --get remote.origin.url) + + # Extract the repository name from the URL + repo_name=$(basename -s .git "$url") + + echo "$repo_name" +} + +SCRIPT_DIR="$(pwd)" + +generate_openapi_models() { + # Parameters + OPENAPI_SPEC_PATH="$1" + API_NAME="$2" + DESTINATION_DIR="$3" + + # Define the temporary output directory for the OpenAPI generator + TEMP_OUTPUT_DIR=$(mktemp -d) + + # Remove the temporary directory at the end of the script + trap 'rm -rf "$TEMP_OUTPUT_DIR"' EXIT + + # Run the OpenAPI generator CLI using the JAR file + java -jar "$OPENAPI_GENERATOR_JAR" generate -g rust \ + -i "$OPENAPI_SPEC_PATH" \ + -o "$TEMP_OUTPUT_DIR" \ + --additional-properties=useSingleRequestParameter=true + + # Check if the generation was successful + if [ $? -ne 0 ]; then + echo "OpenAPI generation failed. Check the verbose output for details." + exit 1 + fi + + # Remove the openapitools.json file + rm -f ./openapitools.json + + echo "TEMP_OUTPUT_DIR is $TEMP_OUTPUT_DIR" + + # Modify the import statements in each generated file + SED_RULE="s/use crate::models;/#![allow(unused_imports)]\n#![allow(clippy::empty_docs)]\nuse crate::$API_NAME::models;/" + for file in $(find "$TEMP_OUTPUT_DIR" -name '*.rs'); do + if [[ "$OSTYPE" == "darwin"* ]]; then + # macOS (BSD) sed syntax + sed -i '' "$SED_RULE" "$file" + else + # Linux (GNU) sed syntax + sed -i "$SED_RULE" "$file" + fi + done + + rm -rf "$DESTINATION_DIR/models" + mkdir -p "$DESTINATION_DIR" + cp -r "$TEMP_OUTPUT_DIR/src/models" "$DESTINATION_DIR" + + echo "OpenAPI generation complete. Models copied to $DESTINATION_DIR" +} + +generate_openapi_models \ + "https://raw.githubusercontent.com/ga4gh-discovery/ga4gh-service-info/develop/service-info.yaml" \ + "serviceinfo" "$SCRIPT_DIR/lib/src/serviceinfo/" + +generate_openapi_models \ + "https://raw.githubusercontent.com/ga4gh/task-execution-schemas/develop/openapi/task_execution_service.openapi.yaml" \ + "tes" "$SCRIPT_DIR/lib/src/tes/" \ No newline at end of file diff --git a/lib/Cargo.toml b/lib/Cargo.toml new file mode 100644 index 0000000..8266b6d --- /dev/null +++ b/lib/Cargo.toml @@ -0,0 +1,38 @@ + +[package] +name = "ga4gh-lib" +description = """ +Generic SDK and CLI for GA4GH API services +""" +repository = "https://github.com/elixir-cloud-aai/ga4gh-sdk/tree/main/lib" +version.workspace = true +authors.workspace = true +edition.workspace = true +readme.workspace = true +license-file.workspace = true +# rust-version = "1.74" + +[dependencies] +tokio = { version = "1", features = ["full"] } +serde = "^1.0" +serde_derive = "^1.0" +serde_json = "^1.0" +url = "^2.2" +uuid = { version = "^1.0", features = ["serde", "v4"] } +log = "0.4" +env_logger = "0.9" +once_cell = "1.8.0" + +[dependencies.reqwest] +version = "^0.11" +features = ["json", "multipart"] + +[dev-dependencies] +mockito = "0.31" +mockall = "0.10.2" +cargo-nextest = "0.9.30" + +[lib] +name = "ga4gh_sdk" +path = "src/lib.rs" + diff --git a/lib/src/configuration.rs b/lib/src/configuration.rs new file mode 100644 index 0000000..b6b9688 --- /dev/null +++ b/lib/src/configuration.rs @@ -0,0 +1,179 @@ +use url::Url; +/// A struct representing a configuration for the SDK. +/// +/// The `Configuration` struct is responsible for specifying details of the Endpoint where the requests are made. +/// It provides methods for making constructing new configuration, changing the base url, and specifying a default configuration. +#[derive(Debug, Clone)] +pub struct Configuration { + /// The base path for API requests. + pub base_path: Url, + /// The user agent to be used in API requests. + pub user_agent: Option, + /// The basic authentication credentials. + pub basic_auth: Option, + /// The OAuth access token for authentication. + pub oauth_access_token: Option, + /// The bearer access token for authentication. + pub bearer_access_token: Option, + /// The API key for authentication. + pub api_key: Option, +} + +/// Represents the basic authentication credentials. +#[derive(Debug, Clone, PartialEq)] +pub struct BasicAuth { + /// The username for basic authentication. + pub username: String, + /// The password for basic authentication. + pub password: Option, +} + +/// Represents the API key for authentication. +#[derive(Debug, Clone, PartialEq)] +pub struct ApiKey { + /// The prefix for the API key. + pub prefix: Option, + /// The key value of the API key. + pub key: String, +} + +impl Configuration { + /// Creates a new instance of Configuration. + /// + /// # Arguments + /// + /// * `base_path` - The base path for API requests. + /// * `user_agent` - The user agent to be used in API requests. + /// * `basic_auth` - The basic authentication credentials. + /// * `oauth_access_token` - The OAuth access token for authentication. + /// + /// # Returns + /// + /// A new instance of Configuration. + pub fn new( + base_path: Url, + ) -> Self { + Configuration { + base_path, + user_agent:None, + basic_auth: None, + oauth_access_token: None, + bearer_access_token: None, + api_key: None, + } + } + + /// Sets the base path for API requests. + /// + /// # Arguments + /// + /// * `base_path` - The base path for API requests. + /// + /// # Returns + /// + /// A mutable reference to the Configuration instance. + pub fn set_base_path(&mut self, base_path: Url) -> &mut Self { + self.base_path = base_path; + self + } + + /// Sets the user agent for API requests. + /// + /// # Arguments + /// + /// * `user_agent` - The user agent to be used in API requests. + /// + /// # Returns + /// + /// A new instance of Configuration. + pub fn with_user_agent(mut self, user_agent: String) -> Self { + self.user_agent = Some(user_agent); + self + } + + /// Sets the basic authentication credentials for API requests. + /// + /// # Arguments + /// + /// * `basic_auth` - The basic authentication credentials. + /// + /// # Returns + /// + /// A new instance of Configuration. + pub fn with_basic_auth(mut self, basic_auth: BasicAuth) -> Self { + self.basic_auth = Some(basic_auth); + self + } + + /// Sets the OAuth access token for API requests. + /// + /// # Arguments + /// + /// * `oauth_access_token` - The OAuth access token for authentication. + /// + /// # Returns + /// + /// A new instance of Configuration. + pub fn with_oauth_access_token(mut self, oauth_access_token: String) -> Self { + self.oauth_access_token = Some(oauth_access_token); + self + } +} + +impl Default for Configuration { + /// Creates a default instance of Configuration. + /// + /// # Returns + /// + /// A default instance of Configuration. + /// This is used to define a configuration for a server that is running on your localhost + fn default() -> Self { + Configuration { + base_path: Url::parse("http://localhost").unwrap(), + user_agent: Some("GA4GH SDK".to_owned()), + basic_auth: None, + oauth_access_token: None, + bearer_access_token: None, + api_key: None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use url::Url; + + #[test] + fn test_new_configuration() { + let config = Configuration::new( + Url::parse("https://api.example.com").unwrap(), + ) + .with_user_agent("My User Agent".to_owned()) + .with_basic_auth(BasicAuth { + username: "admin".to_owned(), + password: Some("password".to_owned()), + }) + .with_oauth_access_token("my_oauth_token".to_owned()); + + assert_eq!(config.base_path.as_str(), "https://api.example.com/"); + assert_eq!(config.user_agent, Some("My User Agent".to_owned())); + assert_eq!( + config.basic_auth, + Some(BasicAuth { + username: "admin".to_owned(), + password: Some("password".to_owned()), + }) + ); + assert_eq!(config.oauth_access_token, Some("my_oauth_token".to_owned())); + assert_eq!(config.bearer_access_token, None); + assert_eq!(config.api_key, None); + } + + #[test] + fn test_set_base_path() { + let mut config = Configuration::default(); + config.set_base_path(Url::parse("https://api.example.com").unwrap()); + assert_eq!(config.base_path.as_str(), "https://api.example.com/"); + } +} diff --git a/lib/src/lib.rs b/lib/src/lib.rs new file mode 100644 index 0000000..243bff2 --- /dev/null +++ b/lib/src/lib.rs @@ -0,0 +1,12 @@ +#[allow(unused_imports)] +#[macro_use] +extern crate serde_derive; + +#[cfg(test)] +mod test_utils; + +pub mod configuration; +pub mod transport; +pub mod serviceinfo; +pub mod tes; + diff --git a/lib/src/serviceinfo/mod.rs b/lib/src/serviceinfo/mod.rs new file mode 100644 index 0000000..7f307cc --- /dev/null +++ b/lib/src/serviceinfo/mod.rs @@ -0,0 +1,66 @@ +pub mod models; +use crate::configuration::Configuration; +use crate::transport::Transport; + +#[derive(Clone)] +pub struct ServiceInfo { + transport: Transport, +} + +impl ServiceInfo { + pub fn new(config: &Configuration) -> Result> { + let transport = Transport::new(config); + let instance = ServiceInfo { + transport: transport.clone(), + }; + Ok(instance) + } + + pub async fn get(&self) -> Result> { + let response = self.transport.get("/service-info", None).await; + match response { + Ok(response_body) => match serde_json::from_str::(&response_body) { + Ok(service) => Ok(service), + Err(e) => { + log::error!("Failed to deserialize response: {}. Response body: {}", e, response_body); + Err(e.into()) + } + }, + Err(e) => { + log::error!("Error getting response: {}", e); + Err(e) + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::configuration::Configuration; + use crate::serviceinfo::ServiceInfo; + use crate::test_utils::{ensure_funnel_running, setup}; + use tokio; + + #[tokio::test] + async fn test_get_service_info_from_funnel() { + setup(); + let mut config = Configuration::default(); + let funnel_url = ensure_funnel_running().await; + + // Parse the funnel_url String into a Url + let funnel_url = url::Url::parse(&funnel_url).expect("Invalid URL format"); + + config.set_base_path(funnel_url); + let service_info = ServiceInfo::new(&config).unwrap(); + + // Call get_service_info and print the result + match service_info.get().await { + Ok(service) => { + println!("Service Info: {:?}", service); + } + Err(e) => { + log::error!("ServiceInfo error in module 'mod.rs': {}", e); + } + } + } +} diff --git a/lib/src/serviceinfo/models/mod.rs b/lib/src/serviceinfo/models/mod.rs new file mode 100644 index 0000000..9aa6373 --- /dev/null +++ b/lib/src/serviceinfo/models/mod.rs @@ -0,0 +1,6 @@ +pub mod service; +pub use self::service::Service; +pub mod service_organization; +pub use self::service_organization::ServiceOrganization; +pub mod service_type; +pub use self::service_type::ServiceType; diff --git a/lib/src/serviceinfo/models/service.rs b/lib/src/serviceinfo/models/service.rs new file mode 100644 index 0000000..ea8816c --- /dev/null +++ b/lib/src/serviceinfo/models/service.rs @@ -0,0 +1,70 @@ +/* + * GA4GH service-info API specification + * + * A way for a service to describe basic metadata concerning a service alongside a set of capabilities and/or limitations of the service. More information on [GitHub](https://github.com/ga4gh-discovery/ga4gh-service-info/). + * + * The version of the OpenAPI document: 1.0.0 + * Contact: ga4gh-discovery-networks@ga4gh.org + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::serviceinfo::models; +use serde::{Deserialize, Serialize}; + +/// Service : GA4GH service +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct Service { + /// Unique ID of this service. Reverse domain name notation is recommended, though not required. The identifier should attempt to be globally unique so it can be used in downstream aggregator services e.g. Service Registry. + #[serde(rename = "id")] + pub id: String, + /// Name of this service. Should be human readable. + #[serde(rename = "name")] + pub name: String, + #[serde(rename = "type")] + pub r#type: Box, + /// Description of the service. Should be human readable and provide information about the service. + #[serde(rename = "description", skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(rename = "organization")] + pub organization: Box, + /// URL of the contact for the provider of this service, e.g. a link to a contact form (RFC 3986 format), or an email (RFC 2368 format). + #[serde(rename = "contactUrl", skip_serializing_if = "Option::is_none")] + pub contact_url: Option, + /// URL of the documentation of this service (RFC 3986 format). This should help someone learn how to use your service, including any specifics required to access data, e.g. authentication. + #[serde(rename = "documentationUrl", skip_serializing_if = "Option::is_none")] + pub documentation_url: Option, + /// Timestamp describing when the service was first deployed and available (RFC 3339 format) + #[serde(rename = "createdAt", skip_serializing_if = "Option::is_none")] + pub created_at: Option, + /// Timestamp describing when the service was last updated (RFC 3339 format) + #[serde(rename = "updatedAt", skip_serializing_if = "Option::is_none")] + pub updated_at: Option, + /// Environment the service is running in. Use this to distinguish between production, development and testing/staging deployments. Suggested values are prod, test, dev, staging. However this is advised and not enforced. + #[serde(rename = "environment", skip_serializing_if = "Option::is_none")] + pub environment: Option, + /// Version of the service being described. Semantic versioning is recommended, but other identifiers, such as dates or commit hashes, are also allowed. The version should be changed whenever the service is updated. + #[serde(rename = "version")] + pub version: String, +} + +impl Service { + /// GA4GH service + pub fn new(id: String, name: String, r#type: models::ServiceType, organization: models::ServiceOrganization, version: String) -> Service { + Service { + id, + name, + r#type: Box::new(r#type), + description: None, + organization: Box::new(organization), + contact_url: None, + documentation_url: None, + created_at: None, + updated_at: None, + environment: None, + version, + } + } +} + diff --git a/lib/src/serviceinfo/models/service_organization.rs b/lib/src/serviceinfo/models/service_organization.rs new file mode 100644 index 0000000..c57a536 --- /dev/null +++ b/lib/src/serviceinfo/models/service_organization.rs @@ -0,0 +1,36 @@ +/* + * GA4GH service-info API specification + * + * A way for a service to describe basic metadata concerning a service alongside a set of capabilities and/or limitations of the service. More information on [GitHub](https://github.com/ga4gh-discovery/ga4gh-service-info/). + * + * The version of the OpenAPI document: 1.0.0 + * Contact: ga4gh-discovery-networks@ga4gh.org + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::serviceinfo::models; +use serde::{Deserialize, Serialize}; + +/// ServiceOrganization : Organization providing the service +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct ServiceOrganization { + /// Name of the organization responsible for the service + #[serde(rename = "name")] + pub name: String, + /// URL of the website of the organization (RFC 3986 format) + #[serde(rename = "url")] + pub url: String, +} + +impl ServiceOrganization { + /// Organization providing the service + pub fn new(name: String, url: String) -> ServiceOrganization { + ServiceOrganization { + name, + url, + } + } +} + diff --git a/lib/src/serviceinfo/models/service_type.rs b/lib/src/serviceinfo/models/service_type.rs new file mode 100644 index 0000000..028fe98 --- /dev/null +++ b/lib/src/serviceinfo/models/service_type.rs @@ -0,0 +1,40 @@ +/* + * GA4GH service-info API specification + * + * A way for a service to describe basic metadata concerning a service alongside a set of capabilities and/or limitations of the service. More information on [GitHub](https://github.com/ga4gh-discovery/ga4gh-service-info/). + * + * The version of the OpenAPI document: 1.0.0 + * Contact: ga4gh-discovery-networks@ga4gh.org + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::serviceinfo::models; +use serde::{Deserialize, Serialize}; + +/// ServiceType : Type of a GA4GH service +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct ServiceType { + /// Namespace in reverse domain name format. Use `org.ga4gh` for implementations compliant with official GA4GH specifications. For services with custom APIs not standardized by GA4GH, or implementations diverging from official GA4GH specifications, use a different namespace (e.g. your organization's reverse domain name). + #[serde(rename = "group")] + pub group: String, + /// Name of the API or GA4GH specification implemented. Official GA4GH types should be assigned as part of standards approval process. Custom artifacts are supported. + #[serde(rename = "artifact")] + pub artifact: String, + /// Version of the API or specification. GA4GH specifications use semantic versioning. + #[serde(rename = "version")] + pub version: String, +} + +impl ServiceType { + /// Type of a GA4GH service + pub fn new(group: String, artifact: String, version: String) -> ServiceType { + ServiceType { + group, + artifact, + version, + } + } +} + diff --git a/lib/src/tes/mod.rs b/lib/src/tes/mod.rs new file mode 100644 index 0000000..a5b9281 --- /dev/null +++ b/lib/src/tes/mod.rs @@ -0,0 +1,288 @@ +pub mod models; +pub mod model; +use crate::configuration::Configuration; +use crate::serviceinfo::models::Service; +use crate::serviceinfo::ServiceInfo; +use crate::tes::models::TesListTasksResponse; +use crate::tes::models::TesState; +use crate::tes::models::TesTask; +use crate::transport::Transport; +use crate::tes::model::ListTasksParams; +use serde_json; +use serde_json::from_str; +use serde_json::json; +use serde::Serialize; +use serde_json::Value; + +fn serialize_to_json(item: T) -> Value { + serde_json::to_value(&item).unwrap() +} + +pub fn urlencode>(s: T) -> String { + ::url::form_urlencoded::byte_serialize(s.as_ref().as_bytes()).collect() +} + +#[derive(Debug)] +pub struct Task { + id: String, + transport: Transport, +} + +impl Task { + pub fn new(id: String, transport: Transport) -> Self { + Task { id, transport } + } + + pub async fn status(&self) -> Result> { + let task_id = &self.id; + let view = "FULL"; + let url = format!("/tasks/{}?view={}", task_id, view); + // let params = [("view", view)]; + // let params_value = serde_json::json!(params); + // let response = self.transport.get(&url, Some(params_value)).await; + let response = self.transport.get(&url, None).await; + match response { + Ok(resp_str) => { + let task: TesTask = from_str(&resp_str)?; + Ok(task.state.unwrap()) + } + Err(e) => { + let err_msg = format!("HTTP request failed: {}", e); + eprintln!("{}", err_msg); + Err(Box::new(std::io::Error::new(std::io::ErrorKind::Other, err_msg))) + } + } + } + + pub async fn cancel(&self) -> Result> { + let id = &self.id; + let id = &urlencode(id); + let url = format!("/tasks/{}:cancel", id); + let response = self.transport.post(&url, None).await; + match response { + Ok(resp_str) => { + let parsed_json = serde_json::from_str::(&resp_str); + match parsed_json { + Ok(json) => Ok(json), + Err(e) => Err(format!("Failed to parse JSON: {}", e).into()), + } + } + Err(e) => Err(format!("HTTP request failed: {}", e).into()), + } + } +} +#[derive(Debug)] +pub struct TES { + #[allow(dead_code)] + config: Configuration, // not used yet + service: Result>, + transport: Transport, +} + +impl TES { + pub async fn new(config: &Configuration) -> Result> { + let transport = Transport::new(config); + let service_info = ServiceInfo::new(config)?; + + let resp = service_info.get().await; + + let instance = TES { + config: config.clone(), + transport, + service: resp, + }; + + instance.check()?; // Propagate the error if check() fails + Ok(instance) + } + + fn check(&self) -> Result<(), String> { + let resp = &self.service; + match resp.as_ref() { + Ok(service) if service.r#type.artifact == "tes" => Ok(()), + Ok(_) => Err("The endpoint is not an instance of TES".into()), + Err(_) => Err("Error accessing the service".into()), + } + } + + pub async fn create( + &self, + task: TesTask, /*, params: models::TesTask*/ + ) -> Result> { + // First, check if the service is of TES class + self.check().map_err(|e| { + log::error!("Service check failed: {}", e); + e + })?; + // todo: version in url based on serviceinfo or user config + let response = self + .transport + .post("/ga4gh/tes/v1/tasks", Some(json!(task))) + .await; + match response { + Ok(response_body) => { + let v: serde_json::Value = serde_json::from_str(&response_body)?; + + // Access the `id` field + let task_id = v + .get("id") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .trim_matches('"') + .to_string(); + + let task = Task { + id: task_id, + transport: self.transport.clone(), + }; + Ok(task) + } + Err(e) => Err(Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to post task: {}", e), + ))), + } + } + + pub async fn get(&self, view: &str, id: &str) -> Result> { + let task_id = id; + let url = format!("/tasks/{}?view={}", task_id, view); + // let params = [("view", view)]; + // let params_value = serde_json::json!(params); + // let response = self.transport.get(&url, Some(params_value)).await; + let response = self.transport.get(&url, None).await; + match response { + Ok(resp_str) => { + let task: TesTask = from_str(&resp_str)?; + Ok(task) + } + Err(e) => Err(e), + } + } + pub async fn list_tasks( + &self, + params: Option, + ) -> Result> { + let params_value = params.map(serialize_to_json); + + // println!("{:?}",params_value); + // Make the request with or without parameters based on the presence of params + let response = if let Some(params_value) = params_value { + self.transport.get("/tasks", Some(params_value)).await + } else { + self.transport.get("/tasks", None).await + }; + + match response { + Ok(resp_str) => { + let task: TesListTasksResponse = from_str(&resp_str)?; + Ok(task) + } + Err(e) => { + eprintln!("HTTP request failed: {:?}", e); + Err(Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + format!("HTTP request failed: {:?}", e), + ))) + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::configuration::Configuration; + use crate::tes::models::TesTask; + use crate::tes::ListTasksParams; + use crate::tes::Task; + use crate::tes::TesState; + use crate::tes::TES; + use crate::test_utils::{ensure_funnel_running, setup}; + // use crate::tes::models::TesCreateTaskResponse; + + async fn create_task() -> Result<(Task, TES), Box> { + // setup(); – should be run once in the test function + let mut config = Configuration::default(); + let funnel_url = ensure_funnel_running().await; + + // Parse the funnel_url String into a Url + let funnel_url = url::Url::parse(&funnel_url).expect("Invalid URL format"); + + config.set_base_path(funnel_url); + let tes = match TES::new(&config).await { + Ok(tes) => tes, + Err(e) => { + println!("Error creating TES instance: {:?}", e); + return Err(e); + } + }; + let file_path = "./tests/sample.tes".to_string(); + let task_json = std::fs::read_to_string(file_path).expect("Unable to read file"); + let task: TesTask = serde_json::from_str(&task_json).expect("JSON was not well-formatted"); + + let task = tes.create(task).await?; + Ok((task, tes)) + } + + #[tokio::test] + async fn test_task_create() { + setup(); + let (task, _tes) = create_task().await.expect("Failed to create task"); + assert!(!task.id.is_empty(), "Task ID should not be empty"); // double check if it's a correct assertion + } + + #[tokio::test] + async fn test_task_status() { + setup(); + + let (task, _tes) = create_task().await.expect("Failed to create task"); + assert!(!task.id.is_empty(), "Task ID should not be empty"); + + let status = task.status().await; + match status { + Ok(state) => { + assert!( + matches!(state, TesState::Initializing | TesState::Queued | TesState::Running), + "Unexpected state: {:?}", + state + ); + } + Err(err) => { + panic!("Task status returned an error: {:?}", err); + } + } + } + + #[tokio::test] + async fn test_cancel_task() { + setup(); + + let (task, _tes) = &create_task().await.expect("Failed to create task"); + assert!(!task.id.is_empty(), "Task ID should not be empty"); // double check if it's a correct assertion + + let cancel = task.cancel().await; + assert!(cancel.is_ok()); + } + + #[tokio::test] + async fn test_list_task() { + setup(); + + let (task, tes) = &create_task().await.expect("Failed to create task"); + assert!(!task.id.is_empty(), "Task ID should not be empty"); // double check if it's a correct assertion + + let params: ListTasksParams = ListTasksParams { + name_prefix: None, + state: None, + tag_key: None, + tag_value: None, + page_size: None, + page_token: None, + view: Some("BASIC".to_string()), + }; + + let list = tes.list_tasks(Some(params)).await; + assert!(list.is_ok()); + println!("{:?}", list); + } +} diff --git a/lib/src/tes/model.rs b/lib/src/tes/model.rs new file mode 100644 index 0000000..a67190d --- /dev/null +++ b/lib/src/tes/model.rs @@ -0,0 +1,27 @@ +use crate::tes::models; + +/// struct for passing parameters to the method [`list_tasks`] +#[derive(Serialize, Clone, Debug)] +pub struct ListTasksParams { + /// OPTIONAL. Filter the list to include tasks where the name matches this prefix. If unspecified, no task name filtering is done. + #[serde(skip_serializing_if = "Option::is_none")] + pub name_prefix: Option, + /// OPTIONAL. Filter tasks by state. If unspecified, no task state filtering is done. + #[serde(skip_serializing_if = "Option::is_none")] + pub state: Option, + /// OPTIONAL. Provide key tag to filter. The field tag_key is an array of key values, and will be zipped with an optional tag_value array. So the query: ``` ?tag_key=foo1&tag_value=bar1&tag_key=foo2&tag_value=bar2 ``` Should be constructed into the structure { \"foo1\" : \"bar1\", \"foo2\" : \"bar2\"} ``` ?tag_key=foo1 ``` Should be constructed into the structure {\"foo1\" : \"\"} If the tag_value is empty, it will be treated as matching any possible value. If a tag value is provided, both the tag's key and value must be exact matches for a task to be returned. Filter Tags Match? ---------------------------------------------------------------------- {\"foo\": \"bar\"} {\"foo\": \"bar\"} Yes {\"foo\": \"bar\"} {\"foo\": \"bat\"} No {\"foo\": \"\"} {\"foo\": \"\"} Yes {\"foo\": \"bar\", \"baz\": \"bat\"} {\"foo\": \"bar\", \"baz\": \"bat\"} Yes {\"foo\": \"bar\"} {\"foo\": \"bar\", \"baz\": \"bat\"} Yes {\"foo\": \"bar\", \"baz\": \"bat\"} {\"foo\": \"bar\"} No {\"foo\": \"\"} {\"foo\": \"bar\"} Yes {\"foo\": \"\"} {} No + #[serde(skip_serializing_if = "Option::is_none")] + pub tag_key: Option>, + /// OPTIONAL. The companion value field for tag_key + #[serde(skip_serializing_if = "Option::is_none")] + pub tag_value: Option>, + /// Optional number of tasks to return in one page. Must be less than 2048. Defaults to 256. + #[serde(skip_serializing_if = "Option::is_none")] + pub page_size: Option, + /// OPTIONAL. Page token is used to retrieve the next page of results. If unspecified, returns the first page of results. The value can be found in the `next_page_token` field of the last returned result of ListTasks + #[serde(skip_serializing_if = "Option::is_none")] + pub page_token: Option, + /// OPTIONAL. Affects the fields included in the returned Task messages. `MINIMAL`: Task message will include ONLY the fields: - `tesTask.Id` - `tesTask.State` `BASIC`: Task message will include all fields EXCEPT: - `tesTask.ExecutorLog.stdout` - `tesTask.ExecutorLog.stderr` - `tesInput.content` - `tesTaskLog.system_logs` `FULL`: Task message includes all fields. + #[serde(skip_serializing_if = "Option::is_none")] + pub view: Option, +} diff --git a/lib/src/tes/models/mod.rs b/lib/src/tes/models/mod.rs new file mode 100644 index 0000000..232b43a --- /dev/null +++ b/lib/src/tes/models/mod.rs @@ -0,0 +1,34 @@ +pub mod service; +pub use self::service::Service; +pub mod service_organization; +pub use self::service_organization::ServiceOrganization; +pub mod service_type; +pub use self::service_type::ServiceType; +pub mod tes_create_task_response; +pub use self::tes_create_task_response::TesCreateTaskResponse; +pub mod tes_executor; +pub use self::tes_executor::TesExecutor; +pub mod tes_executor_log; +pub use self::tes_executor_log::TesExecutorLog; +pub mod tes_file_type; +pub use self::tes_file_type::TesFileType; +pub mod tes_input; +pub use self::tes_input::TesInput; +pub mod tes_list_tasks_response; +pub use self::tes_list_tasks_response::TesListTasksResponse; +pub mod tes_output; +pub use self::tes_output::TesOutput; +pub mod tes_output_file_log; +pub use self::tes_output_file_log::TesOutputFileLog; +pub mod tes_resources; +pub use self::tes_resources::TesResources; +pub mod tes_service_info; +pub use self::tes_service_info::TesServiceInfo; +pub mod tes_service_type; +pub use self::tes_service_type::TesServiceType; +pub mod tes_state; +pub use self::tes_state::TesState; +pub mod tes_task; +pub use self::tes_task::TesTask; +pub mod tes_task_log; +pub use self::tes_task_log::TesTaskLog; diff --git a/lib/src/tes/models/service.rs b/lib/src/tes/models/service.rs new file mode 100644 index 0000000..3c1c6cc --- /dev/null +++ b/lib/src/tes/models/service.rs @@ -0,0 +1,70 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// Service : GA4GH service +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct Service { + /// Unique ID of this service. Reverse domain name notation is recommended, though not required. The identifier should attempt to be globally unique so it can be used in downstream aggregator services e.g. Service Registry. + #[serde(rename = "id")] + pub id: String, + /// Name of this service. Should be human readable. + #[serde(rename = "name")] + pub name: String, + #[serde(rename = "type")] + pub r#type: Box, + /// Description of the service. Should be human readable and provide information about the service. + #[serde(rename = "description", skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(rename = "organization")] + pub organization: Box, + /// URL of the contact for the provider of this service, e.g. a link to a contact form (RFC 3986 format), or an email (RFC 2368 format). + #[serde(rename = "contactUrl", skip_serializing_if = "Option::is_none")] + pub contact_url: Option, + /// URL of the documentation of this service (RFC 3986 format). This should help someone learn how to use your service, including any specifics required to access data, e.g. authentication. + #[serde(rename = "documentationUrl", skip_serializing_if = "Option::is_none")] + pub documentation_url: Option, + /// Timestamp describing when the service was first deployed and available (RFC 3339 format) + #[serde(rename = "createdAt", skip_serializing_if = "Option::is_none")] + pub created_at: Option, + /// Timestamp describing when the service was last updated (RFC 3339 format) + #[serde(rename = "updatedAt", skip_serializing_if = "Option::is_none")] + pub updated_at: Option, + /// Environment the service is running in. Use this to distinguish between production, development and testing/staging deployments. Suggested values are prod, test, dev, staging. However this is advised and not enforced. + #[serde(rename = "environment", skip_serializing_if = "Option::is_none")] + pub environment: Option, + /// Version of the service being described. Semantic versioning is recommended, but other identifiers, such as dates or commit hashes, are also allowed. The version should be changed whenever the service is updated. + #[serde(rename = "version")] + pub version: String, +} + +impl Service { + /// GA4GH service + pub fn new(id: String, name: String, r#type: models::ServiceType, organization: models::ServiceOrganization, version: String) -> Service { + Service { + id, + name, + r#type: Box::new(r#type), + description: None, + organization: Box::new(organization), + contact_url: None, + documentation_url: None, + created_at: None, + updated_at: None, + environment: None, + version, + } + } +} + diff --git a/lib/src/tes/models/service_organization.rs b/lib/src/tes/models/service_organization.rs new file mode 100644 index 0000000..796617c --- /dev/null +++ b/lib/src/tes/models/service_organization.rs @@ -0,0 +1,36 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// ServiceOrganization : Organization providing the service +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct ServiceOrganization { + /// Name of the organization responsible for the service + #[serde(rename = "name")] + pub name: String, + /// URL of the website of the organization (RFC 3986 format) + #[serde(rename = "url")] + pub url: String, +} + +impl ServiceOrganization { + /// Organization providing the service + pub fn new(name: String, url: String) -> ServiceOrganization { + ServiceOrganization { + name, + url, + } + } +} + diff --git a/lib/src/tes/models/service_type.rs b/lib/src/tes/models/service_type.rs new file mode 100644 index 0000000..100cd7f --- /dev/null +++ b/lib/src/tes/models/service_type.rs @@ -0,0 +1,40 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// ServiceType : Type of a GA4GH service +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct ServiceType { + /// Namespace in reverse domain name format. Use `org.ga4gh` for implementations compliant with official GA4GH specifications. For services with custom APIs not standardized by GA4GH, or implementations diverging from official GA4GH specifications, use a different namespace (e.g. your organization's reverse domain name). + #[serde(rename = "group")] + pub group: String, + /// Name of the API or GA4GH specification implemented. Official GA4GH types should be assigned as part of standards approval process. Custom artifacts are supported. + #[serde(rename = "artifact")] + pub artifact: String, + /// Version of the API or specification. GA4GH specifications use semantic versioning. + #[serde(rename = "version")] + pub version: String, +} + +impl ServiceType { + /// Type of a GA4GH service + pub fn new(group: String, artifact: String, version: String) -> ServiceType { + ServiceType { + group, + artifact, + version, + } + } +} + diff --git a/lib/src/tes/models/tes_create_task_response.rs b/lib/src/tes/models/tes_create_task_response.rs new file mode 100644 index 0000000..c1ed614 --- /dev/null +++ b/lib/src/tes/models/tes_create_task_response.rs @@ -0,0 +1,32 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesCreateTaskResponse : CreateTaskResponse describes a response from the CreateTask endpoint. It will include the task ID that can be used to look up the status of the job. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesCreateTaskResponse { + /// Task identifier assigned by the server. + #[serde(rename = "id")] + pub id: String, +} + +impl TesCreateTaskResponse { + /// CreateTaskResponse describes a response from the CreateTask endpoint. It will include the task ID that can be used to look up the status of the job. + pub fn new(id: String) -> TesCreateTaskResponse { + TesCreateTaskResponse { + id, + } + } +} + diff --git a/lib/src/tes/models/tes_executor.rs b/lib/src/tes/models/tes_executor.rs new file mode 100644 index 0000000..498c6e7 --- /dev/null +++ b/lib/src/tes/models/tes_executor.rs @@ -0,0 +1,60 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesExecutor : Executor describes a command to be executed, and its environment. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesExecutor { + /// Name of the container image. The string will be passed as the image argument to the containerization run command. Examples: - `ubuntu` - `quay.io/aptible/ubuntu` - `gcr.io/my-org/my-image` - `myregistryhost:5000/fedora/httpd:version1.0` + #[serde(rename = "image")] + pub image: String, + /// A sequence of program arguments to execute, where the first argument is the program to execute (i.e. argv). Example: ``` { \"command\" : [\"/bin/md5\", \"/data/file1\"] } ``` + #[serde(rename = "command")] + pub command: Vec, + /// The working directory that the command will be executed in. If not defined, the system will default to the directory set by the container image. + #[serde(rename = "workdir", skip_serializing_if = "Option::is_none")] + pub workdir: Option, + /// Path inside the container to a file which will be piped to the executor's stdin. This must be an absolute path. This mechanism could be used in conjunction with the input declaration to process a data file using a tool that expects STDIN. For example, to get the MD5 sum of a file by reading it into the STDIN ``` { \"command\" : [\"/bin/md5\"], \"stdin\" : \"/data/file1\" } ``` + #[serde(rename = "stdin", skip_serializing_if = "Option::is_none")] + pub stdin: Option, + /// Path inside the container to a file where the executor's stdout will be written to. Must be an absolute path. Example: ``` { \"stdout\" : \"/tmp/stdout.log\" } ``` + #[serde(rename = "stdout", skip_serializing_if = "Option::is_none")] + pub stdout: Option, + /// Path inside the container to a file where the executor's stderr will be written to. Must be an absolute path. Example: ``` { \"stderr\" : \"/tmp/stderr.log\" } ``` + #[serde(rename = "stderr", skip_serializing_if = "Option::is_none")] + pub stderr: Option, + /// Enviromental variables to set within the container. Example: ``` { \"env\" : { \"ENV_CONFIG_PATH\" : \"/data/config.file\", \"BLASTDB\" : \"/data/GRC38\", \"HMMERDB\" : \"/data/hmmer\" } } ``` + #[serde(rename = "env", skip_serializing_if = "Option::is_none")] + pub env: Option>, + /// Default behavior of running an array of executors is that execution stops on the first error. If `ignore_error` is `True`, then the runner will record error exit codes, but will continue on to the next tesExecutor. + #[serde(rename = "ignore_error", skip_serializing_if = "Option::is_none")] + pub ignore_error: Option, +} + +impl TesExecutor { + /// Executor describes a command to be executed, and its environment. + pub fn new(image: String, command: Vec) -> TesExecutor { + TesExecutor { + image, + command, + workdir: None, + stdin: None, + stdout: None, + stderr: None, + env: None, + ignore_error: None, + } + } +} + diff --git a/lib/src/tes/models/tes_executor_log.rs b/lib/src/tes/models/tes_executor_log.rs new file mode 100644 index 0000000..69c8bcb --- /dev/null +++ b/lib/src/tes/models/tes_executor_log.rs @@ -0,0 +1,48 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesExecutorLog : ExecutorLog describes logging information related to an Executor. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesExecutorLog { + /// Time the executor started, in RFC 3339 format. + #[serde(rename = "start_time", skip_serializing_if = "Option::is_none")] + pub start_time: Option, + /// Time the executor ended, in RFC 3339 format. + #[serde(rename = "end_time", skip_serializing_if = "Option::is_none")] + pub end_time: Option, + /// Stdout content. This is meant for convenience. No guarantees are made about the content. Implementations may chose different approaches: only the head, only the tail, a URL reference only, etc. In order to capture the full stdout client should set Executor.stdout to a container file path, and use Task.outputs to upload that file to permanent storage. + #[serde(rename = "stdout", skip_serializing_if = "Option::is_none")] + pub stdout: Option, + /// Stderr content. This is meant for convenience. No guarantees are made about the content. Implementations may chose different approaches: only the head, only the tail, a URL reference only, etc. In order to capture the full stderr client should set Executor.stderr to a container file path, and use Task.outputs to upload that file to permanent storage. + #[serde(rename = "stderr", skip_serializing_if = "Option::is_none")] + pub stderr: Option, + /// Exit code. + #[serde(rename = "exit_code")] + pub exit_code: i32, +} + +impl TesExecutorLog { + /// ExecutorLog describes logging information related to an Executor. + pub fn new(exit_code: i32) -> TesExecutorLog { + TesExecutorLog { + start_time: None, + end_time: None, + stdout: None, + stderr: None, + exit_code, + } + } +} + diff --git a/lib/src/tes/models/tes_file_type.rs b/lib/src/tes/models/tes_file_type.rs new file mode 100644 index 0000000..87c2d58 --- /dev/null +++ b/lib/src/tes/models/tes_file_type.rs @@ -0,0 +1,41 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesFileType : Define if input/output element is a file or a directory. It is not required that the user provide this value, but it is required that the server fill in the value once the information is avalible at run time. +/// Define if input/output element is a file or a directory. It is not required that the user provide this value, but it is required that the server fill in the value once the information is avalible at run time. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] +pub enum TesFileType { + #[serde(rename = "FILE")] + File, + #[serde(rename = "DIRECTORY")] + Directory, + +} + +impl std::fmt::Display for TesFileType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::File => write!(f, "FILE"), + Self::Directory => write!(f, "DIRECTORY"), + } + } +} + +impl Default for TesFileType { + fn default() -> TesFileType { + Self::File + } +} + diff --git a/lib/src/tes/models/tes_input.rs b/lib/src/tes/models/tes_input.rs new file mode 100644 index 0000000..6cd8488 --- /dev/null +++ b/lib/src/tes/models/tes_input.rs @@ -0,0 +1,53 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesInput : Input describes Task input files. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesInput { + #[serde(rename = "name", skip_serializing_if = "Option::is_none")] + pub name: Option, + #[serde(rename = "description", skip_serializing_if = "Option::is_none")] + pub description: Option, + /// REQUIRED, unless \"content\" is set. URL in long term storage, for example: - s3://my-object-store/file1 - gs://my-bucket/file2 - file:///path/to/my/file - /path/to/my/file + #[serde(rename = "url", skip_serializing_if = "Option::is_none")] + pub url: Option, + /// Path of the file inside the container. Must be an absolute path. + #[serde(rename = "path")] + pub path: String, + #[serde(rename = "type", skip_serializing_if = "Option::is_none")] + pub r#type: Option, + /// File content literal. Implementations should support a minimum of 128 KiB in this field and may define their own maximum. UTF-8 encoded If content is not empty, \"url\" must be ignored. + #[serde(rename = "content", skip_serializing_if = "Option::is_none")] + pub content: Option, + /// Indicate that a file resource could be accessed using a streaming interface, ie a FUSE mounted s3 object. This flag indicates that using a streaming mount, as opposed to downloading the whole file to the local scratch space, may be faster despite the latency and overhead. This does not mean that the backend will use a streaming interface, as it may not be provided by the vendor, but if the capacity is avalible it can be used without degrading the performance of the underlying program. + #[serde(rename = "streamable", skip_serializing_if = "Option::is_none")] + pub streamable: Option, +} + +impl TesInput { + /// Input describes Task input files. + pub fn new(path: String) -> TesInput { + TesInput { + name: None, + description: None, + url: None, + path, + r#type: None, + content: None, + streamable: None, + } + } +} + diff --git a/lib/src/tes/models/tes_list_tasks_response.rs b/lib/src/tes/models/tes_list_tasks_response.rs new file mode 100644 index 0000000..158be6a --- /dev/null +++ b/lib/src/tes/models/tes_list_tasks_response.rs @@ -0,0 +1,36 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesListTasksResponse : ListTasksResponse describes a response from the ListTasks endpoint. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesListTasksResponse { + /// List of tasks. These tasks will be based on the original submitted task document, but with other fields, such as the job state and logging info, added/changed as the job progresses. + #[serde(rename = "tasks")] + pub tasks: Vec, + /// Token used to return the next page of results. This value can be used in the `page_token` field of the next ListTasks request. + #[serde(rename = "next_page_token", skip_serializing_if = "Option::is_none")] + pub next_page_token: Option, +} + +impl TesListTasksResponse { + /// ListTasksResponse describes a response from the ListTasks endpoint. + pub fn new(tasks: Vec) -> TesListTasksResponse { + TesListTasksResponse { + tasks, + next_page_token: None, + } + } +} + diff --git a/lib/src/tes/models/tes_output.rs b/lib/src/tes/models/tes_output.rs new file mode 100644 index 0000000..9835f72 --- /dev/null +++ b/lib/src/tes/models/tes_output.rs @@ -0,0 +1,51 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesOutput : Output describes Task output files. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesOutput { + /// User-provided name of output file + #[serde(rename = "name", skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Optional users provided description field, can be used for documentation. + #[serde(rename = "description", skip_serializing_if = "Option::is_none")] + pub description: Option, + /// URL at which the TES server makes the output accessible after the task is complete. When tesOutput.path contains wildcards, it must be a directory; see `tesOutput.path_prefix` for details on how output URLs are constructed in this case. For Example: - `s3://my-object-store/file1` - `gs://my-bucket/file2` - `file:///path/to/my/file` + #[serde(rename = "url")] + pub url: String, + /// Absolute path of the file inside the container. May contain pattern matching wildcards to select multiple outputs at once, but mind implications for `tesOutput.url` and `tesOutput.path_prefix`. Only wildcards defined in IEEE Std 1003.1-2017 (POSIX), 12.3 are supported; see https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 + #[serde(rename = "path")] + pub path: String, + /// Prefix to be removed from matching outputs if `tesOutput.path` contains wildcards; output URLs are constructed by appending pruned paths to the directory specfied in `tesOutput.url`. Required if `tesOutput.path` contains wildcards, ignored otherwise. + #[serde(rename = "path_prefix", skip_serializing_if = "Option::is_none")] + pub path_prefix: Option, + #[serde(rename = "type", skip_serializing_if = "Option::is_none")] + pub r#type: Option, +} + +impl TesOutput { + /// Output describes Task output files. + pub fn new(url: String, path: String) -> TesOutput { + TesOutput { + name: None, + description: None, + url, + path, + path_prefix: None, + r#type: None, + } + } +} + diff --git a/lib/src/tes/models/tes_output_file_log.rs b/lib/src/tes/models/tes_output_file_log.rs new file mode 100644 index 0000000..e31ac58 --- /dev/null +++ b/lib/src/tes/models/tes_output_file_log.rs @@ -0,0 +1,40 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesOutputFileLog : OutputFileLog describes a single output file. This describes file details after the task has completed successfully, for logging purposes. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesOutputFileLog { + /// URL of the file in storage, e.g. s3://bucket/file.txt + #[serde(rename = "url")] + pub url: String, + /// Path of the file inside the container. Must be an absolute path. + #[serde(rename = "path")] + pub path: String, + /// Size of the file in bytes. Note, this is currently coded as a string because official JSON doesn't support int64 numbers. + #[serde(rename = "size_bytes")] + pub size_bytes: String, +} + +impl TesOutputFileLog { + /// OutputFileLog describes a single output file. This describes file details after the task has completed successfully, for logging purposes. + pub fn new(url: String, path: String, size_bytes: String) -> TesOutputFileLog { + TesOutputFileLog { + url, + path, + size_bytes, + } + } +} + diff --git a/lib/src/tes/models/tes_resources.rs b/lib/src/tes/models/tes_resources.rs new file mode 100644 index 0000000..0ab22ce --- /dev/null +++ b/lib/src/tes/models/tes_resources.rs @@ -0,0 +1,56 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesResources : Resources describes the resources requested by a task. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesResources { + /// Requested number of CPUs + #[serde(rename = "cpu_cores", skip_serializing_if = "Option::is_none")] + pub cpu_cores: Option, + /// Define if the task is allowed to run on preemptible compute instances, for example, AWS Spot. This option may have no effect when utilized on some backends that don't have the concept of preemptible jobs. + #[serde(rename = "preemptible", skip_serializing_if = "Option::is_none")] + pub preemptible: Option, + /// Requested RAM required in gigabytes (GB) + #[serde(rename = "ram_gb", skip_serializing_if = "Option::is_none")] + pub ram_gb: Option, + /// Requested disk size in gigabytes (GB) + #[serde(rename = "disk_gb", skip_serializing_if = "Option::is_none")] + pub disk_gb: Option, + /// Request that the task be run in these compute zones. How this string is utilized will be dependent on the backend system. For example, a system based on a cluster queueing system may use this string to define priorty queue to which the job is assigned. + #[serde(rename = "zones", skip_serializing_if = "Option::is_none")] + pub zones: Option>, + /// Key/value pairs for backend configuration. ServiceInfo shall return a list of keys that a backend supports. Keys are case insensitive. It is expected that clients pass all runtime or hardware requirement key/values that are not mapped to existing tesResources properties to backend_parameters. Backends shall log system warnings if a key is passed that is unsupported. Backends shall not store or return unsupported keys if included in a task. If backend_parameters_strict equals true, backends should fail the task if any key/values are unsupported, otherwise, backends should attempt to run the task Intended uses include VM size selection, coprocessor configuration, etc. Example: ``` { \"backend_parameters\" : { \"VmSize\" : \"Standard_D64_v3\" } } ``` + #[serde(rename = "backend_parameters", skip_serializing_if = "Option::is_none")] + pub backend_parameters: Option>, + /// If set to true, backends should fail the task if any backend_parameters key/values are unsupported, otherwise, backends should attempt to run the task + #[serde(rename = "backend_parameters_strict", skip_serializing_if = "Option::is_none")] + pub backend_parameters_strict: Option, +} + +impl TesResources { + /// Resources describes the resources requested by a task. + pub fn new() -> TesResources { + TesResources { + cpu_cores: None, + preemptible: None, + ram_gb: None, + disk_gb: None, + zones: None, + backend_parameters: None, + backend_parameters_strict: None, + } + } +} + diff --git a/lib/src/tes/models/tes_service_info.rs b/lib/src/tes/models/tes_service_info.rs new file mode 100644 index 0000000..13157f1 --- /dev/null +++ b/lib/src/tes/models/tes_service_info.rs @@ -0,0 +1,76 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesServiceInfo { + /// Unique ID of this service. Reverse domain name notation is recommended, though not required. The identifier should attempt to be globally unique so it can be used in downstream aggregator services e.g. Service Registry. + #[serde(rename = "id")] + pub id: String, + /// Name of this service. Should be human readable. + #[serde(rename = "name")] + pub name: String, + #[serde(rename = "type")] + pub r#type: Box, + /// Description of the service. Should be human readable and provide information about the service. + #[serde(rename = "description", skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(rename = "organization")] + pub organization: Box, + /// URL of the contact for the provider of this service, e.g. a link to a contact form (RFC 3986 format), or an email (RFC 2368 format). + #[serde(rename = "contactUrl", skip_serializing_if = "Option::is_none")] + pub contact_url: Option, + /// URL of the documentation of this service (RFC 3986 format). This should help someone learn how to use your service, including any specifics required to access data, e.g. authentication. + #[serde(rename = "documentationUrl", skip_serializing_if = "Option::is_none")] + pub documentation_url: Option, + /// Timestamp describing when the service was first deployed and available (RFC 3339 format) + #[serde(rename = "createdAt", skip_serializing_if = "Option::is_none")] + pub created_at: Option, + /// Timestamp describing when the service was last updated (RFC 3339 format) + #[serde(rename = "updatedAt", skip_serializing_if = "Option::is_none")] + pub updated_at: Option, + /// Environment the service is running in. Use this to distinguish between production, development and testing/staging deployments. Suggested values are prod, test, dev, staging. However this is advised and not enforced. + #[serde(rename = "environment", skip_serializing_if = "Option::is_none")] + pub environment: Option, + /// Version of the service being described. Semantic versioning is recommended, but other identifiers, such as dates or commit hashes, are also allowed. The version should be changed whenever the service is updated. + #[serde(rename = "version")] + pub version: String, + /// Lists some, but not necessarily all, storage locations supported by the service. + #[serde(rename = "storage", skip_serializing_if = "Option::is_none")] + pub storage: Option>, + /// Lists all tesResources.backend_parameters keys supported by the service + #[serde(rename = "tesResources_backend_parameters", skip_serializing_if = "Option::is_none")] + pub tes_resources_backend_parameters: Option>, +} + +impl TesServiceInfo { + pub fn new(id: String, name: String, r#type: models::TesServiceType, organization: models::ServiceOrganization, version: String) -> TesServiceInfo { + TesServiceInfo { + id, + name, + r#type: Box::new(r#type), + description: None, + organization: Box::new(organization), + contact_url: None, + documentation_url: None, + created_at: None, + updated_at: None, + environment: None, + version, + storage: None, + tes_resources_backend_parameters: None, + } + } +} + diff --git a/lib/src/tes/models/tes_service_type.rs b/lib/src/tes/models/tes_service_type.rs new file mode 100644 index 0000000..1c746bc --- /dev/null +++ b/lib/src/tes/models/tes_service_type.rs @@ -0,0 +1,49 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesServiceType { + /// Namespace in reverse domain name format. Use `org.ga4gh` for implementations compliant with official GA4GH specifications. For services with custom APIs not standardized by GA4GH, or implementations diverging from official GA4GH specifications, use a different namespace (e.g. your organization's reverse domain name). + #[serde(rename = "group")] + pub group: String, + #[serde(rename = "artifact")] + pub artifact: Artifact, + /// Version of the API or specification. GA4GH specifications use semantic versioning. + #[serde(rename = "version")] + pub version: String, +} + +impl TesServiceType { + pub fn new(group: String, artifact: Artifact, version: String) -> TesServiceType { + TesServiceType { + group, + artifact, + version, + } + } +} +/// +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] +pub enum Artifact { + #[serde(rename = "tes")] + Tes, +} + +impl Default for Artifact { + fn default() -> Artifact { + Self::Tes + } +} + diff --git a/lib/src/tes/models/tes_state.rs b/lib/src/tes/models/tes_state.rs new file mode 100644 index 0000000..83d9ee7 --- /dev/null +++ b/lib/src/tes/models/tes_state.rs @@ -0,0 +1,68 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesState : Task state as defined by the server. - `UNKNOWN`: The state of the task is unknown. The cause for this status message may be dependent on the underlying system. The `UNKNOWN` states provides a safe default for messages where this field is missing so that a missing field does not accidentally imply that the state is QUEUED. - `QUEUED`: The task is queued and awaiting resources to begin computing. - `INITIALIZING`: The task has been assigned to a worker and is currently preparing to run. For example, the worker may be turning on, downloading input files, etc. - `RUNNING`: The task is running. Input files are downloaded and the first Executor has been started. - `PAUSED`: The task is paused. The reasons for this would be tied to the specific system running the job. An implementation may have the ability to pause a task, but this is not required. - `COMPLETE`: The task has completed running. Executors have exited without error and output files have been successfully uploaded. - `EXECUTOR_ERROR`: The task encountered an error in one of the Executor processes. Generally, this means that an Executor exited with a non-zero exit code. - `SYSTEM_ERROR`: The task was stopped due to a system error, but not from an Executor, for example an upload failed due to network issues, the worker's ran out of disk space, etc. - `CANCELED`: The task was canceled by the user, and downstream resources have been deleted. - `CANCELING`: The task was canceled by the user, but the downstream resources are still awaiting deletion. - `PREEMPTED`: The task is stopped (preempted) by the system. The reasons for this would be tied to the specific system running the job. Generally, this means that the system reclaimed the compute capacity for reallocation. +/// Task state as defined by the server. - `UNKNOWN`: The state of the task is unknown. The cause for this status message may be dependent on the underlying system. The `UNKNOWN` states provides a safe default for messages where this field is missing so that a missing field does not accidentally imply that the state is QUEUED. - `QUEUED`: The task is queued and awaiting resources to begin computing. - `INITIALIZING`: The task has been assigned to a worker and is currently preparing to run. For example, the worker may be turning on, downloading input files, etc. - `RUNNING`: The task is running. Input files are downloaded and the first Executor has been started. - `PAUSED`: The task is paused. The reasons for this would be tied to the specific system running the job. An implementation may have the ability to pause a task, but this is not required. - `COMPLETE`: The task has completed running. Executors have exited without error and output files have been successfully uploaded. - `EXECUTOR_ERROR`: The task encountered an error in one of the Executor processes. Generally, this means that an Executor exited with a non-zero exit code. - `SYSTEM_ERROR`: The task was stopped due to a system error, but not from an Executor, for example an upload failed due to network issues, the worker's ran out of disk space, etc. - `CANCELED`: The task was canceled by the user, and downstream resources have been deleted. - `CANCELING`: The task was canceled by the user, but the downstream resources are still awaiting deletion. - `PREEMPTED`: The task is stopped (preempted) by the system. The reasons for this would be tied to the specific system running the job. Generally, this means that the system reclaimed the compute capacity for reallocation. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] +pub enum TesState { + #[serde(rename = "UNKNOWN")] + Unknown, + #[serde(rename = "QUEUED")] + Queued, + #[serde(rename = "INITIALIZING")] + Initializing, + #[serde(rename = "RUNNING")] + Running, + #[serde(rename = "PAUSED")] + Paused, + #[serde(rename = "COMPLETE")] + Complete, + #[serde(rename = "EXECUTOR_ERROR")] + ExecutorError, + #[serde(rename = "SYSTEM_ERROR")] + SystemError, + #[serde(rename = "CANCELED")] + Canceled, + #[serde(rename = "PREEMPTED")] + Preempted, + #[serde(rename = "CANCELING")] + Canceling, + +} + +impl std::fmt::Display for TesState { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Unknown => write!(f, "UNKNOWN"), + Self::Queued => write!(f, "QUEUED"), + Self::Initializing => write!(f, "INITIALIZING"), + Self::Running => write!(f, "RUNNING"), + Self::Paused => write!(f, "PAUSED"), + Self::Complete => write!(f, "COMPLETE"), + Self::ExecutorError => write!(f, "EXECUTOR_ERROR"), + Self::SystemError => write!(f, "SYSTEM_ERROR"), + Self::Canceled => write!(f, "CANCELED"), + Self::Preempted => write!(f, "PREEMPTED"), + Self::Canceling => write!(f, "CANCELING"), + } + } +} + +impl Default for TesState { + fn default() -> TesState { + Self::Unknown + } +} + diff --git a/lib/src/tes/models/tes_task.rs b/lib/src/tes/models/tes_task.rs new file mode 100644 index 0000000..c5251be --- /dev/null +++ b/lib/src/tes/models/tes_task.rs @@ -0,0 +1,74 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesTask : Task describes an instance of a task. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesTask { + /// Task identifier assigned by the server. + #[serde(rename = "id", skip_serializing_if = "Option::is_none")] + pub id: Option, + #[serde(rename = "state", skip_serializing_if = "Option::is_none")] + pub state: Option, + /// User-provided task name. + #[serde(rename = "name", skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Optional user-provided description of task for documentation purposes. + #[serde(rename = "description", skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Input files that will be used by the task. Inputs will be downloaded and mounted into the executor container as defined by the task request document. + #[serde(rename = "inputs", skip_serializing_if = "Option::is_none")] + pub inputs: Option>, + /// Output files. Outputs will be uploaded from the executor container to long-term storage. + #[serde(rename = "outputs", skip_serializing_if = "Option::is_none")] + pub outputs: Option>, + #[serde(rename = "resources", skip_serializing_if = "Option::is_none")] + pub resources: Option>, + /// An array of executors to be run. Each of the executors will run one at a time sequentially. Each executor is a different command that will be run, and each can utilize a different docker image. But each of the executors will see the same mapped inputs and volumes that are declared in the parent CreateTask message. Execution stops on the first error. + #[serde(rename = "executors")] + pub executors: Vec, + /// Volumes are directories which may be used to share data between Executors. Volumes are initialized as empty directories by the system when the task starts and are mounted at the same path in each Executor. For example, given a volume defined at `/vol/A`, executor 1 may write a file to `/vol/A/exec1.out.txt`, then executor 2 may read from that file. (Essentially, this translates to a `docker run -v` flag where the container path is the same for each executor). + #[serde(rename = "volumes", skip_serializing_if = "Option::is_none")] + pub volumes: Option>, + /// A key-value map of arbitrary tags. These can be used to store meta-data and annotations about a task. Example: ``` { \"tags\" : { \"WORKFLOW_ID\" : \"cwl-01234\", \"PROJECT_GROUP\" : \"alice-lab\" } } ``` + #[serde(rename = "tags", skip_serializing_if = "Option::is_none")] + pub tags: Option>, + /// Task logging information. Normally, this will contain only one entry, but in the case where a task fails and is retried, an entry will be appended to this list. + #[serde(rename = "logs", skip_serializing_if = "Option::is_none")] + pub logs: Option>, + /// Date + time the task was created, in RFC 3339 format. This is set by the system, not the client. + #[serde(rename = "creation_time", skip_serializing_if = "Option::is_none")] + pub creation_time: Option, +} + +impl TesTask { + /// Task describes an instance of a task. + pub fn new(executors: Vec) -> TesTask { + TesTask { + id: None, + state: None, + name: None, + description: None, + inputs: None, + outputs: None, + resources: None, + executors, + volumes: None, + tags: None, + logs: None, + creation_time: None, + } + } +} + diff --git a/lib/src/tes/models/tes_task_log.rs b/lib/src/tes/models/tes_task_log.rs new file mode 100644 index 0000000..cc2c990 --- /dev/null +++ b/lib/src/tes/models/tes_task_log.rs @@ -0,0 +1,52 @@ +/* + * Task Execution Service + * + * ## Executive Summary The Task Execution Service (TES) API is a standardized schema and API for describing and executing batch execution tasks. A task defines a set of input files, a set of containers and commands to run, a set of output files and some other logging and metadata. TES servers accept task documents and execute them asynchronously on available compute resources. A TES server could be built on top of a traditional HPC queuing system, such as Grid Engine, Slurm or cloud style compute systems such as AWS Batch or Kubernetes. ## Introduction This document describes the TES API and provides details on the specific endpoints, request formats, and responses. It is intended to provide key information for developers of TES-compatible services as well as clients that will call these TES services. Use cases include: - Deploying existing workflow engines on new infrastructure. Workflow engines such as CWL-Tes and Cromwell have extentions for using TES. This will allow a system engineer to deploy them onto a new infrastructure using a job scheduling system not previously supported by the engine. - Developing a custom workflow management system. This API provides a common interface to asynchronous batch processing capabilities. A developer can write new tools against this interface and expect them to work using a variety of backend solutions that all support the same specification. ## Standards The TES API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTP/HTTPS for information transport. HTTPS should be used rather than plain HTTP except for testing or internal-only purposes. ### Authentication and Authorization Is is envisaged that most TES API instances will require users to authenticate to use the endpoints. However, the decision if authentication is required should be taken by TES API implementers. If authentication is required, we recommend that TES implementations use an OAuth2 bearer token, although they can choose other mechanisms if appropriate. Checking that a user is authorized to submit TES requests is a responsibility of TES implementations. ### CORS If TES API implementation is to be used by another website or domain it must implement Cross Origin Resource Sharing (CORS). Please refer to https://w3id.org/ga4gh/product-approval-support/cors for more information about GA4GH’s recommendations and how to implement CORS. + * + * The version of the OpenAPI document: 1.1.0 + * + * Generated by: https://openapi-generator.tech + */ + +#![allow(unused_imports)] +#![allow(clippy::empty_docs)] +use crate::tes::models; +use serde::{Deserialize, Serialize}; + +/// TesTaskLog : TaskLog describes logging information related to a Task. +#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] +pub struct TesTaskLog { + /// Logs for each executor + #[serde(rename = "logs")] + pub logs: Vec, + /// Arbitrary logging metadata included by the implementation. + #[serde(rename = "metadata", skip_serializing_if = "Option::is_none")] + pub metadata: Option>, + /// When the task started, in RFC 3339 format. + #[serde(rename = "start_time", skip_serializing_if = "Option::is_none")] + pub start_time: Option, + /// When the task ended, in RFC 3339 format. + #[serde(rename = "end_time", skip_serializing_if = "Option::is_none")] + pub end_time: Option, + /// Information about all output files. Directory outputs are flattened into separate items. + #[serde(rename = "outputs")] + pub outputs: Vec, + /// System logs are any logs the system decides are relevant, which are not tied directly to an Executor process. Content is implementation specific: format, size, etc. System logs may be collected here to provide convenient access. For example, the system may include the name of the host where the task is executing, an error message that caused a SYSTEM_ERROR state (e.g. disk is full), etc. System logs are only included in the FULL task view. + #[serde(rename = "system_logs", skip_serializing_if = "Option::is_none")] + pub system_logs: Option>, +} + +impl TesTaskLog { + /// TaskLog describes logging information related to a Task. + pub fn new(logs: Vec, outputs: Vec) -> TesTaskLog { + TesTaskLog { + logs, + metadata: None, + start_time: None, + end_time: None, + outputs, + system_logs: None, + } + } +} + diff --git a/lib/src/test_utils.rs b/lib/src/test_utils.rs new file mode 100644 index 0000000..62763f5 --- /dev/null +++ b/lib/src/test_utils.rs @@ -0,0 +1,32 @@ +use std::env; +use std::process::Command; +use std::str; +use std::sync::Once; + +pub const FUNNEL_HOST: &str = "http://localhost"; +pub const FUNNEL_PORT: u16 = 8000; +pub static INIT: Once = Once::new(); + +pub fn setup() { + INIT.call_once(|| { + env::set_var("RUST_LOG", "debug"); + env_logger::init(); + }); +} + +pub async fn ensure_funnel_running() -> String { + let output = Command::new("sh") + .arg("-c") + .arg("ps aux | grep '[f]unnel server run'") + .output() + .expect("Failed to execute command"); + + let output_str = str::from_utf8(&output.stdout).unwrap(); + + if output_str.is_empty() { + panic!("Funnel is not running."); + } + + let funnel_url = format!("{}:{}", FUNNEL_HOST, FUNNEL_PORT); + funnel_url +} diff --git a/lib/src/transport.rs b/lib/src/transport.rs new file mode 100644 index 0000000..569200b --- /dev/null +++ b/lib/src/transport.rs @@ -0,0 +1,228 @@ +/// A struct representing a transport for making HTTP requests. +/// +/// The `Transport` struct is responsible for handling HTTP requests using the `reqwest` crate. +/// It provides methods for making GET, POST, PUT, and DELETE requests. +/// +/// # Examples +/// +/// ``` +/// use crate::configuration::Configuration; +/// use crate::transport::Transport; +/// +/// let config = Configuration::new("Url::parse("https://api.example.com").unwrap() , None, None, None); +/// let transport = Transport::new(&config); +/// +/// // Make a GET request +/// let response = transport.get("/users", None).await; +/// +/// // Make a POST request +/// let data = serde_json::json!({"name": "John Doe", "age": 30}); +/// let response = transport.post("/users", Some(data)).await; +/// +/// // Make a PUT request +/// let data = serde_json::json!({"name": "John Doe", "age": 30}); +/// let response = transport.put("/users/1", data).await; +/// +/// // Make a DELETE request +/// let response = transport.delete("/users/1").await; +/// ``` +use crate::configuration::Configuration; +use log::error; +use reqwest::Client; +use serde_json::Value; +use std::error::Error; + +// note: could implement custom certs handling, such as in-TEE generated ephemerial certs +#[derive(Clone, Debug)] +pub struct Transport { + pub config: Configuration, + pub client: reqwest::Client, +} + +impl Transport { + + /// Creates a new `Transport` instance with the given configuration. + /// + /// # Arguments + /// + /// * `config` - The configuration for the transport. + /// + /// # Returns + /// + /// A new `Transport` instance. + pub fn new(config: &Configuration) -> Self { + Transport { + config: config.clone(), + client: Client::new(), + } + } + + /// Sends an HTTP request with the specified method, endpoint, data, and parameters. + /// + /// # Arguments + /// + /// * `method` - The HTTP method for the request. + /// * `endpoint` - The endpoint for the request. + /// * `data` - The data to send with the request (optional). + /// * `params` - The query parameters for the request (optional). + /// + /// # Returns + /// + /// A `Result` containing the response body as a string, or an error if the request fails. + async fn request( + &self, + method: reqwest::Method, + endpoint: &str, + data: Option, + params: Option, + ) -> Result> { + let base_url = &self.config.base_path; + let url = base_url.join(endpoint).map_err(|e| { + error!("Invalid endpoint (shouldn't contain base url): {}. Error: {}", endpoint, e); + Box::new(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid endpoint")) as Box + })?; + + let mut request_builder = self.client.request(method, url); + + if let Some(ref user_agent) = self.config.user_agent { + request_builder = request_builder.header(reqwest::header::USER_AGENT, user_agent.clone()); + } + + if let Some(ref params_value) = params { + // Validate or log params_value before setting it as query parameters + if params_value.is_object() { + request_builder = request_builder.query(params_value); + } else { + error!("params_value is not an object and cannot be used as query parameters: {:?}", params_value); + return Err(Box::new(std::io::Error::new(std::io::ErrorKind::InvalidInput, "params_value must be an object"))); + } + } + + if let Some(ref data) = data { + if serde_json::to_string(&data).is_ok() { + request_builder = request_builder.json(&data); + } else { + log::error!("Parameters are invalid, and can't convert to JSON"); + } + } + + let resp = request_builder.send().await.map_err(|e| { + eprintln!("HTTP request failed: {}", e); + e + })?; + + let status = resp.status(); + let content = resp.text().await.map_err(|e| { + std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Failed to read response text: {}", e)) + })?; + + if status.is_success() { + Ok(content) + } else { + Err(Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Request failed with status: {}. Response: {}", status, content), + ))) + } + } + + /// Sends a GET request to the specified endpoint with the given query parameters. + /// + /// # Arguments + /// + /// * `endpoint` - The endpoint for the request. + /// * `params` - The query parameters for the request (optional). + /// + /// # Returns + /// + /// A `Result` containing the response body as a string, or an error if the request fails. + pub async fn get( + &self, + endpoint: &str, + params: Option, + ) -> Result> { + self.request(reqwest::Method::GET, endpoint, None, params) + .await + } + + /// Sends a POST request to the specified endpoint with the given data. + /// + /// # Arguments + /// + /// * `endpoint` - The endpoint for the request. + /// * `data` - The data to send with the request (optional). + /// + /// # Returns + /// + /// A `Result` containing the response body as a string, or an error if the request fails. + pub async fn post( + &self, + endpoint: &str, + data: Option, + ) -> Result> { + self.request(reqwest::Method::POST, endpoint, data, None) + .await + } + + /// Sends a PUT request to the specified endpoint with the given data. + /// + /// # Arguments + /// + /// * `endpoint` - The endpoint for the request. + /// * `data` - The data to send with the request. + /// + /// # Returns + /// + /// A `Result` containing the response body as a string, or an error if the request fails. + pub async fn put(&self, endpoint: &str, data: Value) -> Result> { + self.request(reqwest::Method::PUT, endpoint, Some(data), None) + .await + } + + /// Sends a DELETE request to the specified endpoint. + /// + /// # Arguments + /// + /// * `endpoint` - The endpoint for the request. + /// + /// # Returns + /// + /// A `Result` containing the response body as a string, or an error if the request fails. + pub async fn delete(&self, endpoint: &str) -> Result> { + self.request(reqwest::Method::DELETE, endpoint, None, None) + .await + } + + // other HTTP methods can be added here +} + +#[cfg(test)] +mod tests { + use crate::configuration::Configuration; + use crate::transport::Transport; + use mockito::mock; + use url::Url; + + // effectively no sense in testing various responses, as it's reqwest's responsibility + // we should test Transport's methods instead + + #[tokio::test] + async fn test_request() { + let base_url_str = mockito::server_url(); + let base_url = Url::parse(&base_url_str).expect("Failed to parse mock server URL"); + + let _m = mock("GET", "/test") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"message": "success"}"#) + .create(); + + let config = Configuration::new(base_url.clone()); + let transport = Transport::new(&config.clone()); + let response = transport.get("/test", None).await; + + assert!(response.is_ok()); + let body = response.unwrap(); + assert_eq!(body, r#"{"message": "success"}"#); + } +} diff --git a/nextest.toml b/nextest.toml new file mode 100644 index 0000000..02d1453 --- /dev/null +++ b/nextest.toml @@ -0,0 +1,15 @@ +[profile.default] +retries = 1 + +[script-pre-commands] +[[profile.default]] +commands = [ + { cmd = "sh", args = ["-c", "if ! ps aux | grep '[f]unnel server run'; then echo 'Funnel server is not running. Starting it now...'; export PATH=$PATH:~/go/bin; funnel server run --Server.HostName=localhost --Server.HTTPPort=8000 > funnel.log 2>&1 & fi"] }, + { cmd = "sh", args = ["-c", "while ! curl -s http://localhost:8000/healthz > /dev/null; do echo 'Waiting for Funnel server...'; sleep 1; done; echo 'Funnel server is running.'"] } +] + +[script-post-commands] +[[profile.default]] +commands = [ + # Add any post-test teardown commands here if needed +] diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 0000000..4361c36 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,22 @@ +# Check if a "funnel" process is already running +if ! ps aux | grep '[f]unnel server run'; then + # If it's not running, start it + echo "Funnel server is not running. Starting it now..." + export PATH=$PATH:~/go/bin + funnel server run --Server.HostName=localhost --Server.HTTPPort=8000 > funnel.log 2>&1 & +else + echo "Funnel server is already running." +fi + +# Wait for the Funnel server to start +echo "Waiting for Funnel server to start..." +while ! curl -s http://localhost:8000/healthz > /dev/null +do + echo "Waiting for Funnel server..." + sleep 1 +done +echo "Funnel server is running." + +# Run the tests +RUST_BACKTRACE=1 RUST_LOG=debug cargo test + diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..62437b6 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,4 @@ +ignore = [ + "lib/serviceinfo/models", + "lib/tes/models", +] \ No newline at end of file diff --git a/tests/Readme.md b/tests/Readme.md new file mode 100644 index 0000000..21ba62f --- /dev/null +++ b/tests/Readme.md @@ -0,0 +1,7 @@ +# A folder for adding the files being used in unit tests + + +sample.tes: This is a sample file, taken from the [funnel docs](https://ohsu-comp-bio.github.io/funnel/docs/tasks/), and this file is being used in the file lib/src/tes/mod.rs + + +grape.tes: a sample file containing JSON task data for the GA4GH [Task Execution Service](https://github.com/ga4gh/task-execution-schemas), which can be used in the file lib/src/tes/mod.rs instead of sample.tes. Notably, it has placeholders like "${AWS_ACCESS_KEY_ID}" which is out of the standard and implies implementing a pre-processor, might be useful to note and implement in future as it avoids storing credentials in such .tes files diff --git a/tests/grape.tes b/tests/grape.tes new file mode 100644 index 0000000..2cceaf6 --- /dev/null +++ b/tests/grape.tes @@ -0,0 +1,85 @@ +{ + "name": "GRAPE", + "resources": { + "disk_gb": 200 + }, + "volumes": [ + "/vol/a/" + ], + "executors": [ + { + "image": "amazon/aws-cli", + "command": [ + "aws", + "s3", + "cp", + "${INPUT}", + "/vol/a/input.vcf.gz" + ], + "env": { + "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}", + "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}", + "AWS_REGION": "${AWS_REGION}" + } + }, + { + "image": "genxnetwork/grape", + "command": [ + "python", + "launcher.py", + "reference", + "--use-bundle", + "--ref-directory", + "/vol/a/media/ref", + "--real-run" + ] + }, + { + "image": "genxnetwork/grape", + "command": [ + "python", + "launcher.py", + "preprocess", + "--ref-directory", + "/vol/a/media/ref", + "--vcf-file", + "/vol/a/input.vcf.gz", + "--directory", + "/vol/a/media/data", + "--assembly", + "hg37", + "--real-run" + ] + }, + { + "image": "genxnetwork/grape", + "command": [ + "python", + "launcher.py", + "find", + "--flow", + "ibis", + "--ref-directory", + "/vol/a/media/ref", + "--directory", + "/vol/a/media/data", + "--real-run" + ] + }, + { + "image": "amazon/aws-cli", + "command": [ + "aws", + "s3", + "cp", + "/vol/a/media/data/results/relatives.tsv", + "${OUTPUT}" + ], + "env": { + "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}", + "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}", + "AWS_REGION": "${AWS_REGION}" + } + } + ] +} \ No newline at end of file diff --git a/tests/sample.tes b/tests/sample.tes new file mode 100644 index 0000000..e663c3a --- /dev/null +++ b/tests/sample.tes @@ -0,0 +1,16 @@ +{ + "name": "Hello world", + "inputs": [{ + "url": "s3://funnel-bucket/hello.txt", + "path": "/inputs/hello.txt" + }], + "outputs": [{ + "url": "s3://funnel-bucket/output.txt", + "path": "/outputs/stdout" + }], + "executors": [{ + "image": "alpine", + "command": ["cat", "/inputs/hello.txt"], + "stdout": "/outputs/stdout" + }] +}