diff --git a/library/cpp/grpc/server/grpc_request.h b/library/cpp/grpc/server/grpc_request.h index bb389af2769b..f1beef02b764 100644 --- a/library/cpp/grpc/server/grpc_request.h +++ b/library/cpp/grpc/server/grpc_request.h @@ -179,6 +179,11 @@ class TGRpcRequestImpl return Request_; } + NProtoBuf::Message* GetRequestMut() override { + return Request_; + } + + TAuthState& GetAuthState() override { return AuthState_; } diff --git a/library/cpp/grpc/server/grpc_request_base.h b/library/cpp/grpc/server/grpc_request_base.h index fcfce1c181ac..26afa80c842f 100644 --- a/library/cpp/grpc/server/grpc_request_base.h +++ b/library/cpp/grpc/server/grpc_request_base.h @@ -53,6 +53,9 @@ class IRequestContextBase: public TThrRefBase { //! Get pointer to the request's message. virtual const NProtoBuf::Message* GetRequest() const = 0; + //! Get mutable pointer to the request's message. + virtual NProtoBuf::Message* GetRequestMut() = 0; + //! Get current auth state virtual TAuthState& GetAuthState() = 0; diff --git a/ydb/core/client/server/msgbus_server_s3_listing.cpp b/ydb/core/client/server/msgbus_server_s3_listing.cpp index e021ffee1282..94542d8d7371 100644 --- a/ydb/core/client/server/msgbus_server_s3_listing.cpp +++ b/ydb/core/client/server/msgbus_server_s3_listing.cpp @@ -747,8 +747,13 @@ class TS3ListingRequestGrpc : protected TMessageConverter, public NMsgBusProxy:: } GrpcRequest->ReplyWithYdbStatus(grpcStatus); } else { - Ydb::S3Internal::S3ListingResult grpcResult = TMessageConverter::ConvertResult(msgbusResponse); - GrpcRequest->SendResult(grpcResult, Ydb::StatusIds::SUCCESS); + try { + Ydb::S3Internal::S3ListingResult grpcResult = TMessageConverter::ConvertResult(msgbusResponse); + GrpcRequest->SendResult(grpcResult, Ydb::StatusIds::SUCCESS); + } catch(std::exception ex) { + GrpcRequest->RaiseIssue(NYql::ExceptionToIssue(ex)); + GrpcRequest->ReplyWithYdbStatus(Ydb::StatusIds::INTERNAL_ERROR); + } } } }; diff --git a/ydb/core/cms/CMakeLists.txt b/ydb/core/cms/CMakeLists.txt index 90da238fba00..9f9a311e127e 100644 --- a/ydb/core/cms/CMakeLists.txt +++ b/ydb/core/cms/CMakeLists.txt @@ -93,10 +93,10 @@ target_link_libraries(ydb-core-cms.global PUBLIC tools-enum_parser-enum_serialization_runtime ) target_sources(ydb-core-cms.global PRIVATE - ${CMAKE_BINARY_DIR}/ydb/core/cms/bda6b2eb0fe591794e0191b6befe2972.cpp + ${CMAKE_BINARY_DIR}/ydb/core/cms/117417182f73c245478c9903130ed8f2.cpp ) resources(ydb-core-cms.global - ${CMAKE_BINARY_DIR}/ydb/core/cms/bda6b2eb0fe591794e0191b6befe2972.cpp + ${CMAKE_BINARY_DIR}/ydb/core/cms/117417182f73c245478c9903130ed8f2.cpp INPUTS ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/index.html ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/cms.css @@ -124,6 +124,8 @@ resources(ydb-core-cms.global ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/res/remove.png ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/validators.js ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/sentinel_state.js + ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/nanotable.js + ${CMAKE_SOURCE_DIR}/ydb/core/cms/ui/sentinel.css KEYS cms/ui/index.html cms/ui/cms.css @@ -151,4 +153,6 @@ resources(ydb-core-cms.global cms/ui/res/remove.png cms/ui/validators.js cms/ui/sentinel_state.js + cms/ui/nanotable.js + cms/ui/sentinel.css ) diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index 6698252bdb95..329455896480 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -1052,7 +1052,7 @@ void TCms::Die(const TActorContext& ctx) TActorBase::Die(ctx); } -void TCms::AddHostState(const TNodeInfo &node, TClusterStateResponse &resp, TInstant timestamp) +void TCms::AddHostState(const TClusterInfoPtr &clusterInfo, const TNodeInfo &node, TClusterStateResponse &resp, TInstant timestamp) { auto *host = resp.MutableState()->AddHosts(); host->SetName(node.Host); @@ -1060,6 +1060,7 @@ void TCms::AddHostState(const TNodeInfo &node, TClusterStateResponse &resp, TIns host->SetNodeId(node.NodeId); host->SetInterconnectPort(node.IcPort); host->SetTimestamp(timestamp.GetValue()); + node.Location.Serialize(host->MutableLocation(), false); if (node.State == UP || node.VDisks || node.PDisks) { for (const auto flag : GetEnumAllValues()) { if (!(node.Services & flag)) { @@ -1076,7 +1077,7 @@ void TCms::AddHostState(const TNodeInfo &node, TClusterStateResponse &resp, TIns } for (const auto &vdId : node.VDisks) { - const auto &vdisk = ClusterInfo->VDisk(vdId); + const auto &vdisk = clusterInfo->VDisk(vdId); auto *device = host->AddDevices(); device->SetName(vdisk.GetDeviceName()); device->SetState(vdisk.State); @@ -1084,7 +1085,7 @@ void TCms::AddHostState(const TNodeInfo &node, TClusterStateResponse &resp, TIns } for (const auto &pdId : node.PDisks) { - const auto &pdisk = ClusterInfo->PDisk(pdId); + const auto &pdisk = clusterInfo->PDisk(pdId); auto *device = host->AddDevices(); device->SetName(pdisk.GetDeviceName()); device->SetState(pdisk.State); @@ -1602,7 +1603,7 @@ void TCms::Handle(TEvCms::TEvClusterStateRequest::TPtr &ev, for (const auto &host : rec.GetHosts()) { if (ClusterInfo->NodesCount(host) >= 1) { for (const TNodeInfo *node : ClusterInfo->HostNodes(host)) { - AddHostState(*node, resp->Record, ClusterInfo->GetTimestamp()); + AddHostState(ClusterInfo, *node, resp->Record, ClusterInfo->GetTimestamp()); } } else { return ReplyWithError( @@ -1611,7 +1612,7 @@ void TCms::Handle(TEvCms::TEvClusterStateRequest::TPtr &ev, } } else { for (const auto &entry : ClusterInfo->AllNodes()) - AddHostState(*entry.second, resp->Record, ClusterInfo->GetTimestamp()); + AddHostState(ClusterInfo, *entry.second, resp->Record, ClusterInfo->GetTimestamp()); } resp->Record.MutableStatus()->SetCode(TStatus::OK); diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index a4e69f0b8ff8..a662c8371af1 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -75,6 +75,8 @@ class TCms : public TActor, public TTabletExecutedFlat { void PersistNodeTenants(TTransactionContext& txc, const TActorContext& ctx); + static void AddHostState(const TClusterInfoPtr &clusterInfo, const TNodeInfo &node, NKikimrCms::TClusterStateResponse &resp, TInstant timestamp); + private: using TActorBase = TActor; using EStatusCode = NKikimrCms::TStatus::ECode; @@ -355,7 +357,6 @@ class TCms : public TActor, public TTabletExecutedFlat { void Cleanup(const TActorContext &ctx); void Die(const TActorContext& ctx) override; - void AddHostState(const TNodeInfo &node, NKikimrCms::TClusterStateResponse &resp, TInstant timestamp); void GetPermission(TEvCms::TEvManagePermissionRequest::TPtr &ev, bool all, const TActorContext &ctx); void RemovePermission(TEvCms::TEvManagePermissionRequest::TPtr &ev, bool done, const TActorContext &ctx); void GetRequest(TEvCms::TEvManageRequestRequest::TPtr &ev, bool all, const TActorContext &ctx); diff --git a/ydb/core/cms/console/console__create_tenant.cpp b/ydb/core/cms/console/console__create_tenant.cpp index 2f8676de3575..469261f25a25 100644 --- a/ydb/core/cms/console/console__create_tenant.cpp +++ b/ydb/core/cms/console/console__create_tenant.cpp @@ -126,7 +126,7 @@ class TTenantsManager::TTxCreateTenant : public TTransactionBaseIsExternalSubdomain = Self->FeatureFlags.GetEnableExternalSubdomains(); Tenant->IsExternalHive = Self->FeatureFlags.GetEnableExternalHive(); - Tenant->IsExternalSysViewProcessor = Self->FeatureFlags.GetEnablePersistentQueryStats(); + Tenant->IsExternalSysViewProcessor = Self->FeatureFlags.GetEnableSystemViews(); if (rec.options().disable_external_subdomain()) { Tenant->IsExternalSubdomain = false; diff --git a/ydb/core/cms/json_proxy_sentinel.h b/ydb/core/cms/json_proxy_sentinel.h index 7520765e0685..cc069fe26a39 100644 --- a/ydb/core/cms/json_proxy_sentinel.h +++ b/ydb/core/cms/json_proxy_sentinel.h @@ -17,6 +17,49 @@ class TJsonProxySentinel : public TJsonProxyCms PrepareRequest(const TActorContext &) override { TAutoPtr request = new TRequest; + const TCgiParameters& cgi = RequestEvent->Get()->Request.GetParams(); + + if (cgi.Has("show")) { + NKikimrCms::TGetSentinelStateRequest::EShow show; + NKikimrCms::TGetSentinelStateRequest::EShow_Parse(cgi.Get("show"), &show); + request->Record.SetShow(show); + } + + if (cgi.Has("range")) { + TVector> ranges; + auto rangesStr = cgi.Get("range"); + TVector strRanges; + StringSplitter(rangesStr).Split(',').Collect(&strRanges); + for (auto& strRange : strRanges) { + ui32 begin = 0; + ui32 end = 0; + if (!StringSplitter(strRange).Split('-').TryCollectInto(&begin, &end)) { + if (TryFromString(strRange, begin)) { + end = begin; + } else { + break; // TODO + } + } + ranges.push_back({begin, end}); + } + sort(ranges.begin(), ranges.end()); + auto it = ranges.begin(); + auto current = *(it)++; + while (it != ranges.end()) { + if (current.second > it->first){ + current.second = std::max(current.second, it->second); + } else { + auto* newRange = request->Record.AddRanges(); + newRange->SetBegin(current.first); + newRange->SetEnd(current.second); + current = *(it); + } + it++; + } + auto* newRange = request->Record.AddRanges(); + newRange->SetBegin(current.first); + newRange->SetEnd(current.second); + } return request; } diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index 20289839192d..e7ccde90a9a5 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -19,8 +19,7 @@ #include #include -namespace NKikimr { -namespace NCms { +namespace NKikimr::NCms { #if defined LOG_T || \ defined LOG_D || \ @@ -188,14 +187,14 @@ void TPDiskInfo::AddState(EPDiskState state) { /// TClusterMap -TClusterMap::TClusterMap(TCmsStatePtr state) +TClusterMap::TClusterMap(TSentinelState::TPtr state) : State(state) -{} +{ +} void TClusterMap::AddPDisk(const TPDiskID& id) { - Y_VERIFY(State->ClusterInfo->HasNode(id.NodeId)); - Y_VERIFY(State->ClusterInfo->HasPDisk(id)); - const auto& location = State->ClusterInfo->Node(id.NodeId).Location; + Y_VERIFY(State->Nodes.contains(id.NodeId)); + const auto& location = State->Nodes[id.NodeId].Location; ByDataCenter[location.HasKey(TNodeLocation::TKeys::DataCenter) ? location.GetDataCenterId() : ""].insert(id); ByRoom[location.HasKey(TNodeLocation::TKeys::Module) ? location.GetModuleId() : ""].insert(id); @@ -205,7 +204,7 @@ void TClusterMap::AddPDisk(const TPDiskID& id) { /// TGuardian -TGuardian::TGuardian(TCmsStatePtr state, ui32 dataCenterRatio, ui32 roomRatio, ui32 rackRatio) +TGuardian::TGuardian(TSentinelState::TPtr state, ui32 dataCenterRatio, ui32 roomRatio, ui32 rackRatio) : TClusterMap(state) , DataCenterRatio(dataCenterRatio) , RoomRatio(roomRatio) @@ -271,13 +270,6 @@ TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TSt return result; } -/// Main state -struct TSentinelState: public TSimpleRefCount { - using TPtr = TIntrusivePtr; - - TMap PDisks; -}; - /// Actors template @@ -326,8 +318,8 @@ class TUpdaterBase: public TSentinelChildBase { : TSentinelChildBase(parent, cmsState) , SentinelState(sentinelState) { - for (auto& pdisk : SentinelState->PDisks) { - pdisk.second.ClearTouched(); + for (auto& [_, info] : SentinelState->PDisks) { + info->ClearTouched(); } } @@ -337,14 +329,44 @@ class TUpdaterBase: public TSentinelChildBase { }; // TUpdaterBase class TConfigUpdater: public TUpdaterBase { - void Retry() { - ++Attempt; - Schedule(Config.RetryUpdateConfig, new TEvSentinel::TEvRetry()); + enum class RetryCookie { + BSC, + CMS, + }; + + void MaybeReply() { + if (SentinelState->ConfigUpdaterState.GotBSCResponse && SentinelState->ConfigUpdaterState.GotCMSResponse) { + Reply(); + } + } + + void RetryBSC() { + ++SentinelState->ConfigUpdaterState.BSCAttempt; + Schedule(Config.RetryUpdateConfig, new TEvents::TEvWakeup(static_cast(RetryCookie::BSC))); + } + + void RetryCMS() { + ++SentinelState->ConfigUpdaterState.CMSAttempt; + Schedule(Config.RetryUpdateConfig, new TEvents::TEvWakeup(static_cast(RetryCookie::CMS))); + } + + void OnRetry(TEvents::TEvWakeup::TPtr& ev) { + const auto* msg = ev->Get(); + switch (static_cast(msg->Tag)) { + case RetryCookie::BSC: + RequestBSConfig(); + break; + case RetryCookie::CMS: + RequestCMSClusterState(); + break; + default: + Y_FAIL("Unexpected case"); + } } void RequestBSConfig() { LOG_D("Request blobstorage config" - << ": attempt# " << Attempt); + << ": attempt# " << SentinelState->ConfigUpdaterState.BSCAttempt); if (!CmsState->BSControllerPipe) { ConnectBSC(); @@ -355,6 +377,46 @@ class TConfigUpdater: public TUpdaterBaseBSControllerPipe, request.Release()); } + void RequestCMSClusterState() { + LOG_D("Request CMS cluster state" + << ": attempt# " << SentinelState->ConfigUpdaterState.CMSAttempt); + // We aren't tracking delivery due to invariant that CMS always kills sentinel when dies itself + Send(CmsState->CmsActorId, new TEvCms::TEvClusterStateRequest()); + } + + void Handle(TEvCms::TEvClusterStateResponse::TPtr& ev) { + const auto& record = ev->Get()->Record; + + LOG_D("Handle TEvCms::TEvClusterStateResponse" + << ": response# " << record.ShortDebugString()); + + if (!record.HasStatus() || !record.GetStatus().HasCode() || record.GetStatus().GetCode() != NKikimrCms::TStatus::OK) { + TString error = ""; + if (record.HasStatus() && record.GetStatus().HasCode() && record.GetStatus().HasReason()) { + error = NKikimrCms::TStatus::ECode_Name(record.GetStatus().GetCode()) + " " + record.GetStatus().GetReason(); + } + + LOG_E("Unsuccesful response from CMS" + << ", error# " << error); + + RetryCMS(); + + return; + } + + if (record.HasState()) { + SentinelState->Nodes.clear(); + for (ui32 i = 0; i < record.GetState().HostsSize(); ++i) { + const auto& host = record.GetState().GetHosts(i); + if (host.HasNodeId() && host.HasLocation() && host.HasName()) { + SentinelState->Nodes.emplace(host.GetNodeId(), TNodeInfo{host.GetName(), NActors::TNodeLocation(host.GetLocation())}); + } + } + } + SentinelState->ConfigUpdaterState.GotCMSResponse = true; + MaybeReply(); + } + void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { const auto& response = ev->Get()->Record.GetResponse(); @@ -370,7 +432,7 @@ class TConfigUpdater: public TUpdaterBasePDisks; @@ -378,20 +440,22 @@ class TConfigUpdater: public TUpdaterBaseTouch(); continue; } - pdisks.emplace(id, TPDiskInfo(pdisk.GetDriveStatus(), Config.DefaultStateLimit, Config.StateLimits)); + pdisks.emplace(id, new TPDiskInfo(pdisk.GetDriveStatus(), Config.DefaultStateLimit, Config.StateLimits)); } - Reply(); + SentinelState->ConfigUpdaterState.GotBSCResponse = true; + + MaybeReply(); } } void OnPipeDisconnected() { LOG_E("Pipe to BSC disconnected"); - Retry(); + RetryBSC(); } public: @@ -407,24 +471,28 @@ class TConfigUpdater: public TUpdaterBasePrevConfigUpdaterState = SentinelState->ConfigUpdaterState; + SentinelState->ConfigUpdaterState.Clear(); + TActor::PassAway(); + } + + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { - cFunc(TEvSentinel::TEvRetry::EventType, RequestBSConfig); - cFunc(TEvSentinel::TEvBSCPipeDisconnected::EventType, OnPipeDisconnected); + hFunc(TEvents::TEvWakeup, OnRetry); + sFunc(TEvSentinel::TEvBSCPipeDisconnected, OnPipeDisconnected); + + hFunc(TEvCms::TEvClusterStateResponse, Handle); hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - cFunc(TEvents::TEvPoisonPill::EventType, PassAway); + sFunc(TEvents::TEvPoisonPill, PassAway); } } - -private: - ui32 Attempt = 0; - }; // TConfigUpdater class TStateUpdater: public TUpdaterBase { @@ -454,17 +522,17 @@ class TStateUpdater: public TUpdaterBaseStateUpdaterWaitNodes.find(nodeId); + if (it == SentinelState->StateUpdaterWaitNodes.end()) { return false; } - WaitNodes.erase(it); + SentinelState->StateUpdaterWaitNodes.erase(it); return true; } void MaybeReply() { - if (WaitNodes) { + if (SentinelState->StateUpdaterWaitNodes) { return; } @@ -474,13 +542,13 @@ class TStateUpdater: public TUpdaterBasePDisks.lower_bound(TPDiskID(nodeId, 0)); while (it != SentinelState->PDisks.end() && it->first.NodeId == nodeId) { - if (skipTouched && it->second.IsTouched()) { + if (skipTouched && it->second->IsTouched()) { ++it; continue; } - Y_VERIFY(!it->second.IsTouched()); - it->second.AddState(state); + Y_VERIFY(!it->second->IsTouched()); + it->second->AddState(state); ++it; } } @@ -526,7 +594,7 @@ class TStateUpdater: public TUpdaterBasesecond.AddState(safeState); + it->second->AddState(safeState); } MarkNodePDisks(nodeId, NKikimrBlobStorage::TPDiskState::Missing, true); @@ -573,8 +641,8 @@ class TStateUpdater: public TUpdaterBaseStateUpdaterWaitNodes) { + const ui32 nodeId = *SentinelState->StateUpdaterWaitNodes.begin(); MarkNodePDisks(nodeId, NKikimrBlobStorage::TPDiskState::Timeout); AcceptNodeReply(nodeId); @@ -595,30 +663,31 @@ class TStateUpdater: public TUpdaterBasePDisks) { - if (WaitNodes.insert(pdisk.first.NodeId).second) { - RequestPDiskState(pdisk.first.NodeId); + for (const auto& [id, _] : SentinelState->PDisks) { + if (SentinelState->StateUpdaterWaitNodes.insert(id.NodeId).second) { + RequestPDiskState(id.NodeId); } } Become(&TThis::StateWork, Config.UpdateStateTimeout, new TEvSentinel::TEvTimeout()); } - STFUNC(StateWork) { - Y_UNUSED(ctx); + void PassAway() override { + SentinelState->StateUpdaterWaitNodes.clear(); + TActor::PassAway(); + } + + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { - cFunc(TEvSentinel::TEvTimeout::EventType, TimedOut); + sFunc(TEvSentinel::TEvTimeout, TimedOut); hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); hFunc(TEvents::TEvUndelivered, Handle); - cFunc(TEvents::TEvPoisonPill::EventType, PassAway); + sFunc(TEvents::TEvPoisonPill, PassAway); } } -private: - THashSet WaitNodes; - }; // TStateUpdater class TStatusChanger: public TSentinelChildBase { @@ -628,7 +697,7 @@ class TStatusChanger: public TSentinelChildBase { } void MaybeRetry() { - if (Attempt++ < Config.ChangeStatusRetries) { + if (Info->StatusChangerState->Attempt++ < Config.ChangeStatusRetries) { Schedule(Config.RetryChangeStatus, new TEvSentinel::TEvRetry()); } else { Reply(false); @@ -638,8 +707,8 @@ class TStatusChanger: public TSentinelChildBase { void RequestStatusChange() { LOG_D("Change pdisk status" << ": pdiskId# " << Id - << ", status# " << Status - << ", attempt# " << Attempt); + << ", status# " << Info->StatusChangerState->Status + << ", attempt# " << Info->StatusChangerState->Attempt); if (!CmsState->BSControllerPipe) { ConnectBSC(); @@ -649,7 +718,7 @@ class TStatusChanger: public TSentinelChildBase { auto& command = *request->Record.MutableRequest()->AddCommand()->MutableUpdateDriveStatus(); command.MutableHostKey()->SetNodeId(Id.NodeId); command.SetPDiskId(Id.DiskId); - command.SetStatus(Status); + command.SetStatus(Info->StatusChangerState->Status); NTabletPipe::SendData(SelfId(), CmsState->BSControllerPipe, request.Release()); } @@ -684,6 +753,13 @@ class TStatusChanger: public TSentinelChildBase { return NKikimrServices::TActivity::CMS_SENTINEL_STATUS_CHANGER_ACTOR; } + void PassAway() override { + Info->LastStatusChange = Now(); + Info->PrevStatusChangerState = Info->StatusChangerState; + Info->StatusChangerState.Reset(); + TActor::PassAway(); + } + static TStringBuf Name() { return "StatusChanger"sv; } @@ -692,11 +768,13 @@ class TStatusChanger: public TSentinelChildBase { const TActorId& parent, TCmsStatePtr state, const TPDiskID& id, + TPDiskInfo::TPtr info, NKikimrBlobStorage::EDriveStatus status) : TBase(parent, state) , Id(id) - , Status(status) + , Info(info) { + info->StatusChangerState = new TStatusChangerState(status); } void Bootstrap() { @@ -704,24 +782,20 @@ class TStatusChanger: public TSentinelChildBase { Become(&TThis::StateWork); } - STFUNC(StateWork) { - Y_UNUSED(ctx); + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { - cFunc(TEvSentinel::TEvRetry::EventType, RequestStatusChange); - cFunc(TEvSentinel::TEvBSCPipeDisconnected::EventType, OnPipeDisconnected); + sFunc(TEvSentinel::TEvRetry, RequestStatusChange); + sFunc(TEvSentinel::TEvBSCPipeDisconnected, OnPipeDisconnected); hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - cFunc(TEvents::TEvPoisonPill::EventType, PassAway); + sFunc(TEvents::TEvPoisonPill, PassAway); } } private: const TPDiskID Id; - const NKikimrBlobStorage::EDriveStatus Status; - - ui32 Attempt = 0; - + TPDiskInfo::TPtr Info; }; // TStatusChanger class TSentinel: public TActorBootstrapped { @@ -749,13 +823,24 @@ class TSentinel: public TActorBootstrapped { } }; - struct TUpdaterInfo { + struct TUpdaterState { TActorId Id; TInstant StartedAt; bool Delayed; + void Clear() { + Id = TActorId(); + StartedAt = TInstant::Zero(); + Delayed = false; + } + }; + + struct TUpdaterInfo: public TUpdaterState { + TUpdaterState PrevState; + TUpdaterInfo() { - Clear(); + PrevState.Clear(); + TUpdaterState::Clear(); } void Start(const TActorId& id, const TInstant& now) { @@ -765,9 +850,8 @@ class TSentinel: public TActorBootstrapped { } void Clear() { - Id = TActorId(); - StartedAt = TInstant::Zero(); - Delayed = false; + PrevState = *this; + TUpdaterState::Clear(); } }; @@ -808,13 +892,13 @@ class TSentinel: public TActorBootstrapped { void RemoveUntouched() { EraseNodesIf(SentinelState->PDisks, [](const auto& kv) { - return !kv.second.IsTouched(); + return !kv.second->IsTouched(); }); } void EnsureAllTouched() const { Y_VERIFY(AllOf(SentinelState->PDisks, [](const auto& kv) { - return kv.second.IsTouched(); + return kv.second->IsTouched(); })); } @@ -827,8 +911,8 @@ class TSentinel: public TActorBootstrapped { action.SetCurrentStatus(status); action.SetRequiredStatus(requiredStatus); - Y_VERIFY(CmsState->ClusterInfo->HasNode(id.NodeId)); - action.SetHost(CmsState->ClusterInfo->Node(id.NodeId).Host); + Y_VERIFY(SentinelState->Nodes.contains(id.NodeId)); + action.SetHost(SentinelState->Nodes[id.NodeId].Host); if (reason) { action.SetReason(reason); @@ -863,7 +947,7 @@ class TSentinel: public TActorBootstrapped { EnsureAllTouched(); - if (!CmsState->ClusterInfo) { + if (SentinelState->Nodes.empty()) { LOG_C("Missing cluster info"); ScheduleUpdate( StateUpdater, Config.UpdateStateInterval, ConfigUpdater @@ -872,26 +956,20 @@ class TSentinel: public TActorBootstrapped { return; } - TClusterMap all(CmsState); - TGuardian changed(CmsState, Config.DataCenterRatio, Config.RoomRatio, Config.RackRatio); + TClusterMap all(SentinelState); + TGuardian changed(SentinelState, Config.DataCenterRatio, Config.RoomRatio, Config.RackRatio); TClusterMap::TPDiskIDSet alwaysAllowed; for (auto& pdisk : SentinelState->PDisks) { const TPDiskID& id = pdisk.first; - TPDiskInfo& info = pdisk.second; + TPDiskInfo& info = *(pdisk.second); - if (!CmsState->ClusterInfo->HasNode(id.NodeId)) { + if (!SentinelState->Nodes.contains(id.NodeId)) { LOG_E("Missing node info" << ": pdiskId# " << id); continue; } - if (!CmsState->ClusterInfo->HasPDisk(id)) { - LOG_E("Missing pdisk info" - << ": pdiskId# " << id); - continue; - } - all.AddPDisk(id); if (info.IsChanged()) { if (info.IsNewStatusGood()) { @@ -906,26 +984,26 @@ class TSentinel: public TActorBootstrapped { TString issues; THashSet disallowed; - TClusterMap::TPDiskIDSet allowed = changed.GetAllowedPDisks(all, issues, disallowed); - Copy(alwaysAllowed.begin(), alwaysAllowed.end(), std::inserter(allowed, allowed.begin())); - for (const TPDiskID& id : allowed) { + std::move(alwaysAllowed.begin(), alwaysAllowed.end(), std::inserter(allowed, allowed.begin())); + + for (const auto& id : allowed) { Y_VERIFY(SentinelState->PDisks.contains(id)); - TPDiskInfo& info = SentinelState->PDisks.at(id); + TPDiskInfo::TPtr info = SentinelState->PDisks.at(id); - if (!info.IsChangingAllowed()) { - info.AllowChanging(); + if (!info->IsChangingAllowed()) { + info->AllowChanging(); continue; } - if (info.StatusChanger) { + if (info->StatusChanger) { continue; } - const EPDiskStatus status = info.GetStatus(); + const EPDiskStatus status = info->GetStatus(); TString reason; - info.ApplyChanges(reason); - const EPDiskStatus requiredStatus = info.GetStatus(); + info->ApplyChanges(reason); + const EPDiskStatus requiredStatus = info->GetStatus(); LOG_N("PDisk status changed" << ": pdiskId# " << id @@ -936,14 +1014,14 @@ class TSentinel: public TActorBootstrapped { LogStatusChange(id, status, requiredStatus, reason); if (!Config.DryRun) { - info.StatusChanger = Register(new TStatusChanger(SelfId(), CmsState, id, requiredStatus)); + info->StatusChanger = RegisterWithSameMailbox(new TStatusChanger(SelfId(), CmsState, id, info, requiredStatus)); (*Counters->PDisksPendingChange)++; } } - for (const TPDiskID& id : disallowed) { + for (const auto& id : disallowed) { Y_VERIFY(SentinelState->PDisks.contains(id)); - SentinelState->PDisks.at(id).DisallowChanging(); + SentinelState->PDisks.at(id)->DisallowChanging(); } if (issues) { @@ -955,29 +1033,105 @@ class TSentinel: public TActorBootstrapped { ); } - void Handle(TEvCms::TEvGetSentinelStateRequest::TPtr& ev, const TActorContext &ctx) { - THolder Response; - Response = MakeHolder(); - auto &rec = Response->Record; - rec.MutableStatus()->SetCode(NKikimrCms::TStatus::OK); + void Handle(TEvCms::TEvGetSentinelStateRequest::TPtr& ev) { + const auto& reqRecord = ev->Get()->Record; + + auto show = NKikimrCms::TGetSentinelStateRequest::UNHEALTHY; + + if (reqRecord.HasShow()) { + show = reqRecord.GetShow(); + } + + TMap ranges = {{1, 20}}; + + if (reqRecord.RangesSize() > 0) { + ranges.clear(); + for (size_t i = 0; i < reqRecord.RangesSize(); i++) { + auto range = reqRecord.GetRanges(i); + if (range.HasBegin() && range.HasEnd()) { + ranges.emplace(range.GetBegin(), range.GetEnd()); + } + } + } + + auto checkRanges = [&](ui32 NodeId) { + auto next = ranges.upper_bound(NodeId); + if (next != ranges.begin()) { + --next; + return next->second >= NodeId; + } + + return false; + }; + + auto filterByStatus = [](const TPDiskInfo& info, NKikimrCms::TGetSentinelStateRequest::EShow filter) { + switch(filter) { + case NKikimrCms::TGetSentinelStateRequest::UNHEALTHY: + return info.GetState() != NKikimrBlobStorage::TPDiskState::Normal || info.GetStatus() != EPDiskStatus::ACTIVE; + case NKikimrCms::TGetSentinelStateRequest::SUSPICIOUS: + return info.GetState() != NKikimrBlobStorage::TPDiskState::Normal + || info.GetStatus() != EPDiskStatus::ACTIVE + || info.StatusChangerState + || !info.IsTouched() + || !info.IsChangingAllowed(); + default: + return true; + } + }; + + auto response = MakeHolder(); + + auto& record = response->Record; + record.MutableStatus()->SetCode(NKikimrCms::TStatus::OK); + Config.Serialize(*record.MutableSentinelConfig()); - auto& sentinelConfig = *rec.MutableSentinelConfig(); - Config.Serialize(sentinelConfig); + auto serializeUpdater = [](const auto& updater, auto* out){ + out->SetActorId(updater.Id.ToString()); + out->SetStartedAt(updater.StartedAt.ToString()); + out->SetDelayed(updater.Delayed); + }; if (SentinelState) { - for (auto it = SentinelState->PDisks.begin(); it != SentinelState->PDisks.end(); ++it) { - auto &entry = *rec.AddPDisks(); - entry.MutableId()->SetNodeId(it->first.NodeId); - entry.MutableId()->SetDiskId(it->first.DiskId); - entry.MutableInfo()->SetState(it->second.GetState()); - entry.MutableInfo()->SetPrevState(it->second.GetPrevState()); - entry.MutableInfo()->SetStateCounter(it->second.GetStateCounter()); - entry.MutableInfo()->SetStatus(it->second.GetStatus()); - entry.MutableInfo()->SetChangingAllowed(it->second.IsChangingAllowed()); - entry.MutableInfo()->SetTouched(it->second.IsTouched()); + auto& stateUpdater = *record.MutableStateUpdater(); + serializeUpdater(StateUpdater, stateUpdater.MutableUpdaterInfo()); + serializeUpdater(StateUpdater.PrevState, stateUpdater.MutablePrevUpdaterInfo()); + for (const auto& waitNode : SentinelState->StateUpdaterWaitNodes) { + stateUpdater.AddWaitNodes(waitNode); + } + + auto& configUpdater = *record.MutableConfigUpdater(); + serializeUpdater(ConfigUpdater, configUpdater.MutableUpdaterInfo()); + serializeUpdater(ConfigUpdater.PrevState, configUpdater.MutablePrevUpdaterInfo()); + configUpdater.SetBSCAttempt(SentinelState->ConfigUpdaterState.BSCAttempt); + configUpdater.SetPrevBSCAttempt(SentinelState->PrevConfigUpdaterState.BSCAttempt); + configUpdater.SetCMSAttempt(SentinelState->ConfigUpdaterState.CMSAttempt); + configUpdater.SetPrevCMSAttempt(SentinelState->PrevConfigUpdaterState.CMSAttempt); + + for (const auto& [id, info] : SentinelState->PDisks) { + if (filterByStatus(*info, show) && checkRanges(id.NodeId)) { + auto& entry = *record.AddPDisks(); + entry.MutableId()->SetNodeId(id.NodeId); + entry.MutableId()->SetDiskId(id.DiskId); + entry.MutableInfo()->SetState(info->GetState()); + entry.MutableInfo()->SetPrevState(info->GetPrevState()); + entry.MutableInfo()->SetStateCounter(info->GetStateCounter()); + entry.MutableInfo()->SetStatus(info->GetStatus()); + entry.MutableInfo()->SetChangingAllowed(info->IsChangingAllowed()); + entry.MutableInfo()->SetTouched(info->IsTouched()); + entry.MutableInfo()->SetLastStatusChange(info->LastStatusChange.ToString()); + if (info->StatusChangerState) { + entry.MutableInfo()->SetDesiredStatus(info->StatusChangerState->Status); + entry.MutableInfo()->SetStatusChangeAttempts(info->StatusChangerState->Attempt); + } + if (info->PrevStatusChangerState) { + entry.MutableInfo()->SetPrevDesiredStatus(info->PrevStatusChangerState->Status); + entry.MutableInfo()->SetPrevStatusChangeAttempts(info->PrevStatusChangerState->Attempt); + } + } } } - ctx.Send(ev->Sender, Response.Release()); + + Send(ev->Sender, std::move(response)); } void Handle(TEvSentinel::TEvStatusChanged::TPtr& ev) { @@ -1005,7 +1159,7 @@ class TSentinel: public TActorBootstrapped { (*Counters->PDisksChanged)++; } - it->second.StatusChanger = TActorId(); + it->second->StatusChanger = TActorId(); } void OnPipeDisconnected() { @@ -1013,8 +1167,8 @@ class TSentinel: public TActorBootstrapped { Send(actor, new TEvSentinel::TEvBSCPipeDisconnected()); } - for (const auto& pdisk : SentinelState->PDisks) { - if (const TActorId& actor = pdisk.second.StatusChanger) { + for (const auto& [_, info] : SentinelState->PDisks) { + if (const TActorId& actor = info->StatusChanger) { Send(actor, new TEvSentinel::TEvBSCPipeDisconnected()); } } @@ -1029,8 +1183,8 @@ class TSentinel: public TActorBootstrapped { Send(actor, new TEvents::TEvPoisonPill()); } - for (const auto& pdisk : SentinelState->PDisks) { - if (const TActorId& actor = pdisk.second.StatusChanger) { + for (const auto& [_, info] : SentinelState->PDisks) { + if (const TActorId& actor = info->StatusChanger) { Send(actor, new TEvents::TEvPoisonPill()); } } @@ -1064,18 +1218,17 @@ class TSentinel: public TActorBootstrapped { Become(&TThis::StateWork); } - STFUNC(StateWork) { - Y_UNUSED(ctx); + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { - cFunc(TEvSentinel::TEvUpdateConfig::EventType, UpdateConfig); - cFunc(TEvSentinel::TEvConfigUpdated::EventType, OnConfigUpdated); - cFunc(TEvSentinel::TEvUpdateState::EventType, UpdateState); - cFunc(TEvSentinel::TEvStateUpdated::EventType, OnStateUpdated); + sFunc(TEvSentinel::TEvUpdateConfig, UpdateConfig); + sFunc(TEvSentinel::TEvConfigUpdated, OnConfigUpdated); + sFunc(TEvSentinel::TEvUpdateState, UpdateState); + sFunc(TEvSentinel::TEvStateUpdated, OnStateUpdated); hFunc(TEvSentinel::TEvStatusChanged, Handle); - HFunc(TEvCms::TEvGetSentinelStateRequest, Handle); - cFunc(TEvSentinel::TEvBSCPipeDisconnected::EventType, OnPipeDisconnected); + hFunc(TEvCms::TEvGetSentinelStateRequest, Handle); + sFunc(TEvSentinel::TEvBSCPipeDisconnected, OnPipeDisconnected); - cFunc(TEvents::TEvPoisonPill::EventType, PassAway); + sFunc(TEvents::TEvPoisonPill, PassAway); } } @@ -1096,5 +1249,4 @@ IActor* CreateSentinel(TCmsStatePtr state) { return new NSentinel::TSentinel(state); } -} // NCms -} // NKikimr +} // NKikimr::NCms diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 1ae78ff822c3..00029ed6169a 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -71,8 +71,27 @@ class TPDiskStatus: public TPDiskStatusComputer { }; // TPDiskStatus -struct TPDiskInfo: public TPDiskStatus { +struct TStatusChangerState: public TSimpleRefCount { + using TPtr = TIntrusivePtr; + + explicit TStatusChangerState(NKikimrBlobStorage::EDriveStatus status) + : Status(status) + {} + + const NKikimrBlobStorage::EDriveStatus Status; + ui32 Attempt = 0; +}; // TStatusChangerState + +struct TPDiskInfo + : public TSimpleRefCount + , public TPDiskStatus +{ + using TPtr = TIntrusivePtr; + TActorId StatusChanger; + TInstant LastStatusChange; + TStatusChangerState::TPtr StatusChangerState; + TStatusChangerState::TPtr PrevStatusChangerState; explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits); @@ -84,22 +103,50 @@ struct TPDiskInfo: public TPDiskStatus { private: bool Touched; - }; // TPDiskInfo +struct TNodeInfo { + TString Host; + NActors::TNodeLocation Location; +}; + +struct TConfigUpdaterState { + ui32 BSCAttempt = 0; + ui32 CMSAttempt = 0; + bool GotBSCResponse = false; + bool GotCMSResponse = false; + + void Clear() { + *this = TConfigUpdaterState{}; + } +}; + +/// Main state +struct TSentinelState: public TSimpleRefCount { + using TPtr = TIntrusivePtr; + + using TNodeId = ui32; + + TMap PDisks; + TMap Nodes; + THashSet StateUpdaterWaitNodes; + TConfigUpdaterState ConfigUpdaterState; + TConfigUpdaterState PrevConfigUpdaterState; +}; + class TClusterMap { public: using TPDiskIDSet = THashSet; using TDistribution = THashMap; using TNodeIDSet = THashSet; - TCmsStatePtr State; + TSentinelState::TPtr State; TDistribution ByDataCenter; TDistribution ByRoom; TDistribution ByRack; THashMap NodeByRack; - TClusterMap(TCmsStatePtr state); + TClusterMap(TSentinelState::TPtr state); void AddPDisk(const TPDiskID& id); }; // TClusterMap @@ -114,7 +161,7 @@ class TGuardian : public TClusterMap { } public: - explicit TGuardian(TCmsStatePtr state, ui32 dataCenterRatio = 100, ui32 roomRatio = 100, ui32 rackRatio = 100); + explicit TGuardian(TSentinelState::TPtr state, ui32 dataCenterRatio = 100, ui32 roomRatio = 100, ui32 rackRatio = 100); TPDiskIDSet GetAllowedPDisks(const TClusterMap& all, TString& issues, TPDiskIDSet& disallowed) const; diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp index 0ad3a65f221c..0e1f33ae94db 100644 --- a/ydb/core/cms/sentinel_ut.cpp +++ b/ydb/core/cms/sentinel_ut.cpp @@ -1,6 +1,7 @@ #include "cms_ut_common.h" #include "sentinel.h" #include "sentinel_impl.h" +#include "cms_impl.h" #include @@ -136,7 +137,8 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { } } - TCmsStatePtr MockCmsState(ui16 numDataCenter, ui16 racksPerDataCenter, ui16 nodesPerRack, ui16 pdisksPerNode, bool anyDC, bool anyRack) { + std::pair MockCmsState(ui16 numDataCenter, ui16 racksPerDataCenter, ui16 nodesPerRack, ui16 pdisksPerNode, bool anyDC, bool anyRack) { + TSentinelState::TPtr sentinelState = new TSentinelState; TCmsStatePtr state = new TCmsState; state->ClusterInfo = new TClusterInfo; @@ -156,6 +158,7 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { location.SetUnit(ToString(id)); state->ClusterInfo->AddNode(TEvInterconnect::TNodeInfo(id, name, name, name, 10000, TNodeLocation(location)), nullptr); + sentinelState->Nodes[id] = NSentinel::TNodeInfo{name, NActors::TNodeLocation(location)}; for (ui64 npdisk : xrange(pdisksPerNode)) { NKikimrBlobStorage::TBaseConfig::TPDisk pdisk; @@ -168,16 +171,16 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { } } - return state; + return {state, sentinelState}; } void GuardianDataCenterRatio(ui16 numDataCenter, const TVector& nodesPerDataCenterVariants, bool anyDC = false) { UNIT_ASSERT(!anyDC || numDataCenter == 1); for (ui16 nodesPerDataCenter : nodesPerDataCenterVariants) { - TCmsStatePtr state = MockCmsState(numDataCenter, nodesPerDataCenter, 1, 1, anyDC, false); - TGuardian all(state); - TGuardian changed(state, 50); + auto [state, sentinelState] = MockCmsState(numDataCenter, nodesPerDataCenter, 1, 1, anyDC, false); + TGuardian all(sentinelState); + TGuardian changed(sentinelState, 50); THashSet changedSet; const auto& nodes = state->ClusterInfo->AllNodes(); @@ -233,10 +236,10 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { void GuardianRackRatio(ui16 numRacks, const TVector& nodesPerRackVariants, ui16 numPDisks, bool anyRack) { for (ui16 nodesPerRack : nodesPerRackVariants) { - TCmsStatePtr state = MockCmsState(1, numRacks, nodesPerRack, numPDisks, false, anyRack); + auto [state, sentinelState] = MockCmsState(1, numRacks, nodesPerRack, numPDisks, false, anyRack); - TGuardian all(state); - TGuardian changed(state, 100, 100, 50); + TGuardian all(sentinelState); + TGuardian changed(sentinelState, 100, 100, 50); THashSet changedSet; const auto& nodes = state->ClusterInfo->AllNodes(); @@ -371,9 +374,27 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { return true; } }); + auto prevObserver = SetObserverFunc(&TTestActorRuntimeBase::DefaultObserverFunc); + SetObserverFunc([this, prevObserver](TTestActorRuntimeBase& runtime, + TAutoPtr &event){ + if (event->GetTypeRewrite() == TEvCms::TEvClusterStateRequest::EventType) { + TAutoPtr resp = new TEvCms::TEvClusterStateResponse; + if (State) { + resp->Record.MutableStatus()->SetCode(NKikimrCms::TStatus::OK); + for (const auto &entry : State->ClusterInfo->AllNodes()) { + NCms::TCms::AddHostState(State->ClusterInfo, *entry.second, resp->Record, State->ClusterInfo->GetTimestamp()); + } + } + Send(new IEventHandle(event->Sender, TActorId(), resp.Release())); + return TTestActorRuntime::EEventAction::PROCESS; + } + + return prevObserver(runtime, event); + }); State = new TCmsState; MockClusterInfo(State->ClusterInfo); + State->CmsActorId = GetSender(); Sentinel = Register(CreateSentinel(State)); EnableScheduleForActor(Sentinel, true); diff --git a/ydb/core/cms/ui/index.html b/ydb/core/cms/ui/index.html index a41cb5de4a1e..5563dca3375d 100644 --- a/ydb/core/cms/ui/index.html +++ b/ydb/core/cms/ui/index.html @@ -9,7 +9,9 @@ + + @@ -26,12 +28,6 @@ .narrow-line70 {line-height: 70%} .narrow-line80 {line-height: 80%} .narrow-line90 {line-height: 90%} - pre {outline: 1px solid #ccc; padding: 5px; margin: 5px; } - .string { color: green; } - .number { color: darkorange; } - .boolean { color: blue; } - .null { color: magenta; } - .key { color: red; } @@ -53,7 +49,12 @@ CMS Log
@@ -92,9 +93,26 @@
-
-          Loading...
-          
+
+
+
+
+
+ + + + + + +
+
+ Show nodes: + + +
+
+
+
diff --git a/ydb/core/cms/ui/nanotable.js b/ydb/core/cms/ui/nanotable.js new file mode 100644 index 000000000000..337e8fb4f85e --- /dev/null +++ b/ydb/core/cms/ui/nanotable.js @@ -0,0 +1,203 @@ +class Cell { + constructor(text, onUpdate) { + this.elem = undefined; + this.text = text; + this.header = false; + this.onUpdate = onUpdate; + this.colspan = 1; + this.rowspan = 1; + } + + setRowspan(rowspan) { + this.rowspan = rowspan; + this._render(); + } + + setColspan(colspan) { + this.colspan = colspan; + this._render(); + } + + setHeader(isHeader) { + this.header = isHeader; + var el = this.elem; + var newone = this.header ? $("") : $(""); + newone.addClass(el.attr("class")); + el.before(newone); + el.remove(); + this.elem = newone; + this._render(); + } + + setElem(elem) { + if (this.elem != undefined) { + elem.addClass(this.elem.attr("class")); + } + this.elem = elem; + this._render(); + } + + setText(text, silent = false) { + this.text = text; + this._render(); + if (!silent) { + this.onUpdate(this); + } + } + + _render() { + if (this.elem) { + this.elem.attr("colspan", this.colspan); + this.elem.attr("rowspan", this.rowspan); + this.elem.text(this.text); + } + } + + isProxy() { + return false; + } +} + +class ProxyCell { + constructor(cell) { + this.cell = cell; + } + + isProxy() { + return true; + } +} + +class Table { + constructor(elem, onCellUpdate, onInsertColumn) { + this.elem = elem; + this.onInsertColumn = onInsertColumn; + this.rows = []; + this.onCellUpdate = onCellUpdate; + } + + addRow(columns) { + var cells = []; + for (var column in columns) { + var cell = new Cell(columns[column], this.onCellUpdate); + if (this.onInsertColumnt !== undefined) { + onInsertColumn(cell, column); + } + cells.push(cell); + } + this.rows.push(cells); + this._drawRow(this.rows.length - 1); + return this.rows[this.rows.length - 1]; + } + + removeRow(rowId) { + this.elem.children().eq(rowId).remove(); + var row = this.rows[rowId]; + this.rows.splice(rowId, 1); + for (var cell of row) { + if (cell.isProxy()) { + cell.cell.setRowspan(cell.cell.rowspan - 1) + } + } + } + + removeRowByElem(cell) { + var index = cell.elem.parent().index(); + return this.removeRow(index); + } + + insertRow(rowId, columns) { + var cells = []; + var ignoreColspan = 0; + for (var column in columns) { + if ( + this.rows[rowId] === undefined || + !this.rows[rowId][column].isProxy() + ) { + var cell = new Cell(columns[column], this.onCellUpdate); + if (this.onInsertColumnt !== undefined) { + this.onInsertColumn(cell, column); + } + cells.push(cell); + } else { + var spanCell = this.at(rowId, column); + if (ignoreColspan === 0) { + ignoreColspan = spanCell.colspan; + spanCell.setRowspan(spanCell.rowspan + 1); + } else { + --ignoreColspan; + } + cells.push(new ProxyCell(spanCell)); + } + } + this.rows.splice(rowId, 0, cells); + this._drawRow(rowId); + return this.rows[rowId]; + } + + insertRowAfter(cell, columns) { + var index = cell.elem.parent().index() + cell.rowspan; + return this.insertRow(index, columns); + } + + merge(rowStart, rowEnd, colStart, colEnd) { + var cell = this.at(rowStart, colStart); + var newColspan = colEnd - colStart + 1; + var newRowspan = rowEnd - rowStart + 1; + if (cell.colspan < newColspan) { + cell.setColspan(newColspan); + } + if (cell.rowspan < newRowspan) { + cell.setRowspan(rowEnd - rowStart + 1); + } + for (let i = rowStart; i <= rowEnd; i++) { + for (let j = colStart; j <= colEnd; j++) { + if (i !== rowStart || j !== colStart) { + this.rows[i][j] = new ProxyCell(cell); + } + } + this._redrawRow(i); + } + } + + mergeCells(from, to) { + var fromRow = from.elem.parent().index(); + var toRow = to.elem.parent().index(); + this.merge(fromRow, toRow, 0, 0); //TODO: support cells + } + + at(row, col) { + var cell = this.rows[row][col]; + return cell.isProxy() ? cell.cell : cell; + } + + _drawRow(row) { + var rowElem = $(""); + if (row >= 1) { + var after = this.elem.children().eq(row - 1); + rowElem.insertAfter(after); + } else { + this.elem.prepend(rowElem); + } + for (var cell in this.rows[row]) { + if (!this.rows[row][cell].isProxy()) { + cell = this.at(row, cell); + var cellElem = cell.header ? $("") : $(""); + cell.setElem(cellElem); + rowElem.append(cellElem); + } + } + } + + _redrawRow(row) { + this.elem.children().eq(row).remove(); + this._drawRow(row); + } + + _redraw() { + this.elem.empty(); + for (var row in this.rows) { + this._drawRow(row); + } + } +} diff --git a/ydb/core/cms/ui/sentinel.css b/ydb/core/cms/ui/sentinel.css new file mode 100644 index 000000000000..cf9b7047d80b --- /dev/null +++ b/ydb/core/cms/ui/sentinel.css @@ -0,0 +1,94 @@ +#sentinel-state table, +#sentinel-state th, +#sentinel-state td { + font: 12px/18px Arial, Sans-serif; + border: #cdcdcd 1px solid; +} + +.sentinel-checkbox { + display: inline-block; + padding-right: 8px; + padding-top: 4px; +} + +.sentinel-checkbox > label { + padding: 0 4px; +} + +#sentinel-state > form { + margin-bottom: 0; +} + +#sentinel-state th { + background-color: #99bfe6; + font-weight: bold; +} + +#sentinel-state .red { + font-weight: bold; + color: red; +} + +#sentinel-state .yellow { + font-weight: bold; + color: yellow; +} + +#sentinel-state .green { + font-weight: bold; + color: green; +} + +#sentinel-loader { + display: none; +} + +.active > #sentinel-loader { + display: inline-block; +} + +#sentinel-state table { + width: 100%; + margin-bottom: 16px; +} + +#sentinel-state .side { + width: 22px; + writing-mode: vertical-lr; +} + +circle { + fill: transparent; + stroke: #5555ff; + stroke-width: 10px; + stroke-dasharray: 33; + stroke-dashoffset: 0; + transform: rotate(-90deg); + transform-origin: center; +} + +#sentinel-state .highlight { + animation: highlight 5s linear; +} + +@keyframes highlight { + 0% { + background: green; + } + 100% { + background: none; + } +} + +#sentinel-loader .anim { + animation: clock-animation 5s linear; +} + +@keyframes clock-animation { + 0% { + stroke-dashoffset: 33; + } + 100% { + stroke-dashoffset: 0; + } +} diff --git a/ydb/core/cms/ui/sentinel_state.js b/ydb/core/cms/ui/sentinel_state.js index 57b56365e090..d151441b0599 100644 --- a/ydb/core/cms/ui/sentinel_state.js +++ b/ydb/core/cms/ui/sentinel_state.js @@ -1,41 +1,424 @@ 'use strict'; -var CmsSentinelState = { - fetchInterval: 5000, -}; - -function syntaxHighlight(json) { - if (typeof json != 'string') { - json = JSON.stringify(json, undefined, 4); - } - json = json.replace(/&/g, '&').replace(//g, '>'); - return json.replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/g, function (match) { - var cls = 'number'; - if (/^"/.test(match)) { - if (/:$/.test(match)) { - cls = 'key'; +var TPDiskState = [ + "Initial", + "InitialFormatRead", + "InitialFormatReadError", + "InitialSysLogRead", + "InitialSysLogReadError", + "InitialSysLogParseError", + "InitialCommonLogRead", + "InitialCommonLogReadError", + "InitialCommonLogParseError", + "CommonLoggerInitError", + "Normal", + "OpenFileError", + "ChunkQuotaError", + "DeviceIoError", +]; + +TPDiskState[252] = "Missing"; +TPDiskState[253] = "Timeout"; +TPDiskState[254] = "NodeDisconnected"; +TPDiskState[255] = "Unknown"; + +const EPDiskStatus = [ + "UNKNOWN", + "ACTIVE", + "INACTIVE", + "BROKEN", + "FAULTY", + "TO_BE_REMOVED", +]; + +const PDiskHeaders = [ + "PDiskId", + "State", + "PrevState", + "StateCounter", + "Status", + "ChangingAllowed", + "Touched", + "DesiredStatus", + "StatusChangeAttempts", + "PrevDesiredStatus", + "PrevStatusChangeAttempts", + "LastStatusChange", +]; + +class CmsSentinelState { + + constructor() { + this.fetchInterval = 5000; + this.nodes = {}; + this.pdisks = {}; + this.config = {}; + this.stateUpdater = {}; + this.configUpdater = {}; + this.show = "UNHEALTHY"; + this.range = "1-20"; + this.filtered = {}; + this.filteredSize = 0; + this.gen = 0; + + this.initTab(); + } + + buildPVHeader(table, header) { + var headers = [header, ""]; + var row = table.addRow(headers); + row[0].setHeader(true); + table.merge(0, 0, 0, 1); + headers = ["Param", "Value"]; + row = table.addRow(headers); + row[0].setHeader(true); + row[1].setHeader(true); + return row; + } + + addPVEntry(table, header, key, value) { + var data = [key, value]; + var row = table.insertRowAfter(header, data); + return row; + } + + updatePVEntry(table, row, value, prevValue) { + if(value !== prevValue) { + row[1].setText(value); + } + } + + renderPVEntry(entry, newData) { + var table = entry.table; + var headers = entry.header; + var data = entry.data; + for (var entry in newData) { + if (!data.hasOwnProperty(entry)) { + var row = this.addPVEntry(table, headers[0], entry, newData[entry]); + data[entry] = { + row: row, + data: newData[entry], + }; } else { - cls = 'string'; + this.updatePVEntry( + table, + data[entry].row, + newData[entry], + data[entry].data); + data[entry].data = newData[entry]; } - } else if (/true|false/.test(match)) { - cls = 'boolean'; - } else if (/null/.test(match)) { - cls = 'null'; } - return '' + match + ''; - }); -} + } -function onCmsSentinelStateLoaded(data) { - $("#sentinel-state-content").html(syntaxHighlight(data)); - setTimeout(loadCmsSentinelState, CmsSentinelState.fetchInterval); -} + id(arg) { + return { "value": arg === undefined ? "nil" : arg }; + } + + state(highlight, arg) { + var res = { + "value": arg + ":" + TPDiskState[arg] + }; + if (highlight == true) { + res.class = arg == 10 ? "green" : "red" + } + return res; + } + + status(arg) { + return { "value": arg === undefined ? "nil" : arg + ":" + EPDiskStatus[arg], "class": arg === 1 ? "green" : (arg === undefined ? undefined : "red") }; + } + + bool(arg) { + return { "value": arg === true ? "+" : "-" }; + } + + getPDiskInfoValueMappers() { + return { + "State": function(arg) { return this.state(true, arg); }.bind(this), + "PrevState": function(arg) { return this.state(false, arg); }.bind(this), + "StateCounter": this.id.bind(this), + "Status": this.status.bind(this), + "ChangingAllowed": this.bool.bind(this), + "Touched": this.bool.bind(this), + "DesiredStatus": this.status.bind(this), + "StatusChangeAttempts": this.id.bind(this), + "PrevDesiredStatus": this.id.bind(this), + "PrevStatusChangeAttempts": this.id.bind(this), + "LastStatusChange": this.id.bind(this), + }; + } + + nameToSelector(name) { + return (name.charAt(0).toLowerCase() + name.slice(1)).replace(/([A-Z])/g, "-$1").toLowerCase(); + } + + nameToMember(name) { + return (name.charAt(0).toLowerCase() + name.slice(1)); + } + + restartAnimation(node) { + var el = node; + var newone = el.clone(true); + el.before(newone); + el.remove() + } + + mapPDiskState(cell, key, text, silent = false) { + if (this.getPDiskInfoValueMappers().hasOwnProperty(key)) { + var data = this.getPDiskInfoValueMappers()[key](text); + cell.setText( + data.value, + silent + ); + if (data.hasOwnProperty("class")) { + cell.elem.removeClass("red"); + cell.elem.removeClass("yellow"); + cell.elem.removeClass("green"); + cell.elem.addClass(data.class); + } + } + } + + buildPDisksTableHeader(table, width) { + var headers = ["Node", "PDisk"]; + for (var i = headers.length; i < width; ++i) { + headers.push(""); + } + var row = table.addRow(headers); + row[0].setHeader(true); + row[1].setHeader(true); + table.merge(0, 0, 1, width); + } + + columnFilter(el) { + if (this.filtered.hasOwnProperty(el) && this.filtered[el]) { + return false; + } + return true; + } + + buildNodeHeader(table, NodeId) { + var headers = [NodeId].concat(PDiskHeaders).filter(this.columnFilter.bind(this)); + var row = table.addRow(headers); + row[0].elem.addClass("side"); + for (var i = 1; i < row.length; ++i) { + row[i].setHeader(true); + } + return row; + } + + buildPDisk(table, header, id, diskData) { + diskData["PDiskId"] = id; + var deletionMarker = "DELETED"; + var data = [""].concat(PDiskHeaders.map((x) => this.columnFilter(x) ? diskData[x] : deletionMarker)).filter((x) => x !== deletionMarker); + var row = table.insertRowAfter(header[0], data); + var filteredHeaders = PDiskHeaders.filter(this.columnFilter.bind(this)); + for (var i = 2; i < row.length; ++i) { + var key = filteredHeaders[i - 1]; + this.mapPDiskState(row[i], key, diskData[key], true); + } + return row; + } + + updatePDisk(table, row, id, data, prevData) { + for (var i = 2; i < row.length; ++i) { + var key = PDiskHeaders[i - 1]; + if (data[key] !== prevData[key]) { + this.mapPDiskState(row[i], key, data[key]); + } + } + } + + removeOutdated() { + for (var node in this.nodes) { + for (var pdisk in this.nodes[node].pdisks) { + if (this.nodes[node].pdisks[pdisk].gen != this.gen) { + this.pdisksTable.removeRowByElem(this.nodes[node].pdisks[pdisk].row[1]); // first because zero is a proxy + delete this.nodes[node].pdisks[pdisk]; + } + } + if (Object.keys(this.nodes[node].pdisks).length === 0) { + this.pdisksTable.removeRowByElem(this.nodes[node].header[0]); + delete this.nodes[node]; + } + } + } + + renderPDisksTable(data) { + var table = this.pdisksTable; + + this.gen++; + var currentGen = this.gen; + + for (var ipdisk in data["PDisks"]) { + var pdisk = data["PDisks"][ipdisk]; + var NodeId = pdisk["Id"]["NodeId"]; + var PDiskId = pdisk["Id"]["DiskId"]; + var PDiskInfo = pdisk["Info"]; + if (!this.nodes.hasOwnProperty(NodeId)) { + var row = this.buildNodeHeader(table, NodeId); + this.nodes[NodeId] = { + header: row, + pdisks: {} + }; + } + if (!this.nodes[NodeId].pdisks.hasOwnProperty(PDiskId)) { + var header = this.nodes[NodeId].header; + var row = this.buildPDisk(table, header, PDiskId, PDiskInfo); + + table.mergeCells(header[0], row[0]); + + this.nodes[NodeId].pdisks[PDiskId] = { + row: row, + data: PDiskInfo, + gen: currentGen, + }; + } else { + var prevState = this.nodes[NodeId].pdisks[PDiskId]; + this.updatePDisk(table, prevState.row, PDiskId, PDiskInfo, prevState.data); + prevState.data = PDiskInfo; + prevState.gen = currentGen; + } + } + + this.removeOutdated(); + } + + onThisLoaded(data) { + if (data?.Status?.Code === "OK") { + $("#sentinel-error").empty(); + + this.renderPDisksTable(data); + + this.renderPVEntry(this.config, data["SentinelConfig"]); + + var flattenStateUpdaterResp = data["StateUpdater"]["UpdaterInfo"]; + flattenStateUpdaterResp["WaitNodes"] = data["StateUpdater"]["WaitNodes"]; + for (var key in data["StateUpdater"]["PrevUpdaterInfo"]) { + flattenStateUpdaterResp["Prev" + key] = data["StateUpdater"]["PrevUpdaterInfo"][key]; + } + this.renderPVEntry(this.stateUpdater, flattenStateUpdaterResp); + + var flattenConfigUpdaterResp = data["ConfigUpdater"]["UpdaterInfo"]; + flattenConfigUpdaterResp["BSCAttempt"] = data["ConfigUpdater"]["BSCAttempt"]; + flattenConfigUpdaterResp["CMSAttempt"] = data["ConfigUpdater"]["CMSAttempt"]; + flattenConfigUpdaterResp["PrevBSCAttempt"] = data["ConfigUpdater"]["PrevBSCAttempt"]; + flattenConfigUpdaterResp["PrevCMSAttempt"] = data["ConfigUpdater"]["PrevCMSAttempt"]; + for (var key in data["StateUpdater"]["PrevUpdaterInfo"]) { + flattenConfigUpdaterResp["Prev" + key] = data["ConfigUpdater"]["PrevUpdaterInfo"][key]; + } + this.renderPVEntry(this.configUpdater, flattenConfigUpdaterResp); + } else { + $("#sentinel-error").text("Error while updating state"); + } + setTimeout(this.loadThis.bind(this), this.fetchInterval); + this.restartAnimation($("#sentinel-anim")); + } + + loadThis() { + var show = $('input[name="sentinel-switch"]:checked').val(); -function loadCmsSentinelState() { - var url = 'cms/api/json/sentinel'; - $.get(url).done(onCmsSentinelStateLoaded); + if (show != this.show) { + this.cleanup(); + } + + this.show = show; + var url = 'cms/api/json/sentinel?show=' + this.show; + if (this.range != "") { + url = url + "&range=" + this.range; + } + $.get(url).done(this.onThisLoaded.bind(this)); + } + + onCellUpdate(cell) { + cell.elem.addClass("highlight"); + var el = cell.elem; + var newone = el.clone(true); + el.before(newone); + el.remove(); + cell.elem = newone; + } + + onInsertColumn(cell, columnId) { + cell.onUpdate = cell.onUpdate; + } + + preparePVTable(name) { + var table = new Table($("#sentinel-" + this.nameToSelector(name)), this.onCellUpdate); + var header = this.buildPVHeader(table, name); + this[this.nameToMember(name)] = { + header: header, + table: table, + data: {}, + }; + } + + refreshRange() { + var value = $("#sentinel-range").val(); + const re = /^(?:(?:\d+|(?:\d+-\d+)),)*(?:\d+|(?:\d+-\d+))$/; + if (re.test(value)) { + $("#sentinel-range-error").empty(); + this.range = value; + this.cleanup(); + } else { + $("#sentinel-range-error").text("Invalid range"); + } + } + + addCheckbox(elem, name) { + var cb = $('', { type: 'checkbox', id: 'cb-' + name, value: name, checked: 'checked' }); + + cb.change(function() { + if(cb[0].checked) { + this.filtered[name] = false; + this.filteredSize--; + } else { + this.filtered[name] = true; + this.filteredSize++; + } + this.cleanup(); + }.bind(this)).appendTo(elem); + $('