Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Fix zookeeper quorum and port configuration #357

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,14 @@
- Use 0.0.0-dev product images for tests and examples ([#351])
- Use testing-tools 0.2.0 ([#351])

### Fixed

- Fix `hbase.zookeeper.quorum` to not contain the znode path, instead pass it via `zookeeper.znode.parent` ([#357]).
- Add `hbase.zookeeper.property.clientPort` setting, because hbase sometimes tried to access zookeeper with the wrong port ([#357]).
sbernauer marked this conversation as resolved.
Show resolved Hide resolved

[#349]: https://github.com/stackabletech/hbase-operator/pull/349
[#351]: https://github.com/stackabletech/hbase-operator/pull/351
[#357]: https://github.com/stackabletech/hbase-operator/pull/357

## [23.4.0] - 2023-04-17

Expand Down
1 change: 0 additions & 1 deletion rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ pub const HBASE_REST_OPTS: &str = "HBASE_REST_OPTS";

pub const HBASE_CLUSTER_DISTRIBUTED: &str = "hbase.cluster.distributed";
pub const HBASE_ROOTDIR: &str = "hbase.rootdir";
pub const HBASE_ZOOKEEPER_QUORUM: &str = "hbase.zookeeper.quorum";
pub const HBASE_HEAPSIZE: &str = "HBASE_HEAPSIZE";
pub const HBASE_ROOT_DIR_DEFAULT: &str = "/hbase";

Expand Down
25 changes: 15 additions & 10 deletions rust/operator-binary/src/discovery.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
use crate::hbase_controller::build_recommended_labels;
use stackable_hbase_crd::{HbaseCluster, HbaseRole, HBASE_SITE_XML, HBASE_ZOOKEEPER_QUORUM};
use std::collections::BTreeMap;

use crate::{
hbase_controller::build_recommended_labels, zookeeper::ZookeeperConnectionInformation,
};
use stackable_hbase_crd::{HbaseCluster, HbaseRole, HBASE_SITE_XML};
use stackable_operator::{
builder::{ConfigMapBuilder, ObjectMetaBuilder},
commons::product_image_selection::ResolvedProductImage,
error::OperatorResult,
k8s_openapi::api::core::v1::ConfigMap,
};
use std::collections::HashMap;

/// Creates a discovery config map containing the `hbase-site.xml` for clients.
pub fn build_discovery_configmap(
hbase: &HbaseCluster,
zookeeper_connect_string: &str,
zookeeper_connection_information: &ZookeeperConnectionInformation,
resolved_product_image: &ResolvedProductImage,
) -> OperatorResult<ConfigMap> {
let hbase_site_data: HashMap<String, Option<String>> = [(
HBASE_ZOOKEEPER_QUORUM.to_string(),
Some(zookeeper_connect_string.to_string()),
)]
.into();
let hbase_site = zookeeper_connection_information.as_hbase_settings();

ConfigMapBuilder::new()
.metadata(
Expand All @@ -35,7 +34,13 @@ pub fn build_discovery_configmap(
)
.add_data(
HBASE_SITE_XML,
stackable_operator::product_config::writer::to_hadoop_xml(hbase_site_data.iter()),
stackable_operator::product_config::writer::to_hadoop_xml(
hbase_site
.into_iter()
.map(|(k, v)| (k, Some(v)))
.collect::<BTreeMap<_, _>>()
.iter(),
),
)
.build()
}
61 changes: 23 additions & 38 deletions rust/operator-binary/src/hbase_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ use crate::{
product_logging::{
extend_role_group_config_map, resolve_vector_aggregator_address, LOG4J_CONFIG_FILE,
},
zookeeper::{self, ZookeeperConnectionInformation},
OPERATOR_NAME,
};

use snafu::{OptionExt, ResultExt, Snafu};
use stackable_hbase_crd::{
Container, HbaseCluster, HbaseClusterStatus, HbaseConfig, HbaseConfigFragment, HbaseRole,
APP_NAME, HBASE_ENV_SH, HBASE_HEAPSIZE, HBASE_MASTER_PORT, HBASE_REGIONSERVER_PORT,
HBASE_REST_PORT, HBASE_SITE_XML, HBASE_ZOOKEEPER_QUORUM, JVM_HEAP_FACTOR,
HBASE_REST_PORT, HBASE_SITE_XML, JVM_HEAP_FACTOR,
};
use stackable_operator::{
builder::{
Expand All @@ -38,7 +39,7 @@ use stackable_operator::{
},
apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString},
},
kube::{runtime::controller::Action, Resource, ResourceExt},
kube::{runtime::controller::Action, Resource},
labels::{role_group_selector_labels, role_selector_labels, ObjectLabels},
logging::controller::ReconcilerError,
memory::{BinaryMultiple, MemoryQuantity},
Expand Down Expand Up @@ -78,8 +79,6 @@ const HDFS_DISCOVERY_TMP_DIR: &str = "/stackable/tmp/hdfs";
const HBASE_CONFIG_TMP_DIR: &str = "/stackable/tmp/hbase";
const HBASE_LOG_CONFIG_TMP_DIR: &str = "/stackable/tmp/log_config";

const ZOOKEEPER_DISCOVERY_CM_ENTRY: &str = "ZOOKEEPER";

const DOCKER_IMAGE_BASE_NAME: &str = "hbase";

pub struct Ctx {
Expand Down Expand Up @@ -149,6 +148,8 @@ pub enum Error {
InvalidProductConfig {
source: stackable_operator::error::Error,
},
#[snafu(display("failed to retrieve zookeeper connection information"))]
RetrieveZookeeperConnectionInformation { source: zookeeper::Error },
#[snafu(display("object is missing metadata to build owner reference"))]
ObjectMissingMetadataForOwnerRef {
source: stackable_operator::error::Error,
Expand Down Expand Up @@ -220,26 +221,9 @@ pub async fn reconcile_hbase(hbase: Arc<HbaseCluster>, ctx: Arc<Ctx>) -> Result<
let client = &ctx.client;

let resolved_product_image = hbase.spec.image.resolve(DOCKER_IMAGE_BASE_NAME);

let zk_discovery_cm_name = &hbase.spec.cluster_config.zookeeper_config_map_name;
let zk_connect_string = client
.get::<ConfigMap>(
zk_discovery_cm_name,
hbase
.namespace()
.as_deref()
.context(ObjectHasNoNamespaceSnafu)?,
)
let zookeeper_connection_information = ZookeeperConnectionInformation::retrieve(&hbase, client)
.await
.context(MissingConfigMapSnafu {
cm_name: zk_discovery_cm_name.to_string(),
})?
.data
.and_then(|mut data| data.remove(ZOOKEEPER_DISCOVERY_CM_ENTRY))
.context(MissingConfigMapEntrySnafu {
entry: ZOOKEEPER_DISCOVERY_CM_ENTRY,
cm_name: zk_discovery_cm_name.to_string(),
})?;
.context(RetrieveZookeeperConnectionInformationSnafu)?;

let vector_aggregator_address = resolve_vector_aggregator_address(&hbase, client)
.await
Expand Down Expand Up @@ -274,9 +258,12 @@ pub async fn reconcile_hbase(hbase: Arc<HbaseCluster>, ctx: Arc<Ctx>) -> Result<
.context(ApplyRoleServiceSnafu)?;

// discovery config map
let discovery_cm =
build_discovery_configmap(&hbase, &zk_connect_string, &resolved_product_image)
.context(BuildDiscoveryConfigMapSnafu)?;
let discovery_cm = build_discovery_configmap(
&hbase,
&zookeeper_connection_information,
&resolved_product_image,
)
.context(BuildDiscoveryConfigMapSnafu)?;
cluster_resources
.add(client, discovery_cm)
.await
Expand Down Expand Up @@ -319,7 +306,7 @@ pub async fn reconcile_hbase(hbase: Arc<HbaseCluster>, ctx: Arc<Ctx>) -> Result<
&hbase,
&rolegroup,
rolegroup_config,
&zk_connect_string,
&zookeeper_connection_information,
&config,
&resolved_product_image,
vector_aggregator_address.as_deref(),
Expand Down Expand Up @@ -421,7 +408,7 @@ fn build_rolegroup_config_map(
hbase: &HbaseCluster,
rolegroup: &RoleGroupRef<HbaseCluster>,
rolegroup_config: &HashMap<PropertyNameKind, BTreeMap<String, String>>,
zk_connect_string: &str,
zookeeper_connection_information: &ZookeeperConnectionInformation,
config: &HbaseConfig,
resolved_product_image: &ResolvedProductImage,
vector_aggregator_address: Option<&str>,
Expand All @@ -431,15 +418,7 @@ fn build_rolegroup_config_map(
.cloned()
.unwrap_or_default();

hbase_site_config.insert(
HBASE_ZOOKEEPER_QUORUM.to_string(),
zk_connect_string.to_string(),
);

let hbase_site_config = hbase_site_config
.into_iter()
.map(|(k, v)| (k, Some(v)))
.collect::<BTreeMap<_, _>>();
hbase_site_config.extend(zookeeper_connection_information.as_hbase_settings());

let mut hbase_env_config = rolegroup_config
.get(&PropertyNameKind::File(HBASE_ENV_SH.to_string()))
Expand Down Expand Up @@ -485,7 +464,13 @@ fn build_rolegroup_config_map(
)
.add_data(
HBASE_SITE_XML,
writer::to_hadoop_xml(hbase_site_config.iter()),
writer::to_hadoop_xml(
hbase_site_config
.into_iter()
.map(|(k, v)| (k, Some(v)))
.collect::<BTreeMap<_, _>>()
.iter(),
),
)
.add_data(HBASE_ENV_SH, write_hbase_env_sh(hbase_env_config.iter()));

Expand Down
1 change: 1 addition & 0 deletions rust/operator-binary/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod discovery;
mod hbase_controller;
mod product_logging;
mod zookeeper;

use crate::hbase_controller::HBASE_CONTROLLER_NAME;

Expand Down
127 changes: 127 additions & 0 deletions rust/operator-binary/src/zookeeper.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
use std::{collections::BTreeMap, num::ParseIntError};

use snafu::{OptionExt, ResultExt, Snafu};
use stackable_hbase_crd::HbaseCluster;
use stackable_operator::{
client::Client, k8s_openapi::api::core::v1::ConfigMap, kube::ResourceExt,
};
use strum::{EnumDiscriminants, IntoStaticStr};

const ZOOKEEPER_DISCOVERY_CM_HOSTS_ENTRY: &str = "ZOOKEEPER_HOSTS";
const ZOOKEEPER_DISCOVERY_CM_CHROOT_ENTRY: &str = "ZOOKEEPER_CHROOT";
const ZOOKEEPER_DISCOVERY_CM_CLIENT_PORT_ENTRY: &str = "ZOOKEEPER_CLIENT_PORT";

const HBASE_ZOOKEEPER_QUORUM: &str = "hbase.zookeeper.quorum";
const HBASE_ZOOKEEPER_PROPERTY_CLIENT_PORT: &str = "hbase.zookeeper.property.clientPort";
const ZOOKEEPER_ZNODE_PARENT: &str = "zookeeper.znode.parent";

#[derive(Snafu, Debug, EnumDiscriminants)]
#[strum_discriminants(derive(IntoStaticStr))]
#[allow(clippy::enum_variant_names)]
pub enum Error {
#[snafu(display("object defines no namespace"))]
ObjectHasNoNamespace,
#[snafu(display("failed to retrieve ConfigMap {cm_name}"))]
MissingConfigMap {
source: stackable_operator::error::Error,
cm_name: String,
},
#[snafu(display("failed to retrieve the entry {entry} for ConfigMap {cm_name}"))]
MissingConfigMapEntry { cm_name: String, entry: String },
#[snafu(display("failed to parse the zookeeper port from ConfigMap {cm_name} entry {entry}"))]
ParseZookeeperPort {
source: ParseIntError,
cm_name: String,
entry: String,
},
}

type Result<T, E = Error> = std::result::Result<T, E>;

/// Contains the information as exposed by the Zookeeper/Znode discovery CM (should work with both)
pub struct ZookeeperConnectionInformation {
/// E.g. `simple-zk-server-default-0.simple-zk-server-default.default.svc.cluster.local:2282,simple-zk-server-default-1.simple-zk-server-default.default.svc.cluster.local:2282,simple-zk-server-default-2.simple-zk-server-default.default.svc.cluster.local:2282`
pub hosts: String,
/// E.g. `/znode-123` in case of ZNode discovery CM or `/` in case of Zookeeper discovery CM directly.
pub chroot: String,
/// E.g. 2282 for tls secured Zookeeper
pub port: u16,
sbernauer marked this conversation as resolved.
Show resolved Hide resolved
}

impl ZookeeperConnectionInformation {
pub async fn retrieve(hbase: &HbaseCluster, client: &Client) -> Result<Self> {
let zk_discovery_cm_name = &hbase.spec.cluster_config.zookeeper_config_map_name;
let mut zk_discovery_cm = client
.get::<ConfigMap>(
zk_discovery_cm_name,
hbase
.namespace()
.as_deref()
.context(ObjectHasNoNamespaceSnafu)?,
)
.await
.context(MissingConfigMapSnafu {
cm_name: zk_discovery_cm_name.to_string(),
})?;

let hosts = zk_discovery_cm
.data
.as_mut()
.and_then(|data| data.remove(ZOOKEEPER_DISCOVERY_CM_HOSTS_ENTRY))
.context(MissingConfigMapEntrySnafu {
cm_name: zk_discovery_cm_name.as_str(),
entry: ZOOKEEPER_DISCOVERY_CM_HOSTS_ENTRY,
})?;
let chroot = zk_discovery_cm
.data
.as_mut()
.and_then(|data| data.remove(ZOOKEEPER_DISCOVERY_CM_CHROOT_ENTRY))
.context(MissingConfigMapEntrySnafu {
cm_name: zk_discovery_cm_name.as_str(),
entry: ZOOKEEPER_DISCOVERY_CM_CHROOT_ENTRY,
})?;
let port = zk_discovery_cm
.data
.as_mut()
.and_then(|data| data.remove(ZOOKEEPER_DISCOVERY_CM_CLIENT_PORT_ENTRY))
.context(MissingConfigMapEntrySnafu {
cm_name: zk_discovery_cm_name.as_str(),
entry: ZOOKEEPER_DISCOVERY_CM_CLIENT_PORT_ENTRY,
})?
.parse()
.context(ParseZookeeperPortSnafu {
cm_name: zk_discovery_cm_name.as_str(),
entry: ZOOKEEPER_DISCOVERY_CM_CLIENT_PORT_ENTRY,
})?;

// IMPORTANT!
// Before https://github.com/stackabletech/hbase-operator/issues/354, hbase automatically added a `/hbase` suffix, ending up with a chroot of e.g. `/znode-fe51edff-8df9-43a8-ac5f-4781b071ae5f/hbase`.
// Because the chroot we read from the ZNode discovery CM is only `/znode-fe51edff-8df9-43a8-ac5f-4781b071ae5f`, we need to prepend the `/hbase` suffix ourselves.
// If we don't do so, hbase clusters created before #354 would need to be migrated to the different znode path!
let chroot = format!("{chroot}/hbase");

Ok(Self {
hosts,
chroot,
port,
})
}

pub fn as_hbase_settings(&self) -> BTreeMap<String, String> {
BTreeMap::from([
// We use ZOOKEEPER_HOSTS (host1:port1,host2:port2) instead of ZOOKEEPER (host1:port1,host2:port2/znode-123) here, because HBase cannot deal with a chroot properly.
// It is - in theory - a valid ZK connection string but HBase does its own parsing (last checked in HBase 2.5.3) which does not understand chroots properly.
// It worked for us because we also provide a ZK port and that works but if the port is ever left out the parsing would break.
// See https://github.com/stackabletech/hbase-operator/issues/354 for details
(HBASE_ZOOKEEPER_QUORUM.to_string(), self.hosts.clone()),
// As mentioned it's a good idea to pass this explicitly as well.
// We had some cases where hbase tried to connect to zookeeper using the wrong (default) port.
(
HBASE_ZOOKEEPER_PROPERTY_CLIENT_PORT.to_string(),
self.port.to_string(),
),
// As we only pass in the hosts (and not the znode) in the zookeeper quorum, we need to specify the znode path
(ZOOKEEPER_ZNODE_PARENT.to_string(), self.chroot.clone()),
])
}
}