Skip to content

Commit

Permalink
[DeepRec] Support to collect timeline in Serving. (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
shanshanpt authored Feb 17, 2023
1 parent 5017b0f commit b8717f3
Show file tree
Hide file tree
Showing 22 changed files with 3,416 additions and 3 deletions.
59 changes: 59 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,65 @@ http_archive(
urls = ["https://github.com/nelhage/rules_boost/archive/9f9fb8b2f0213989247c9d5c0e814a8451d18d7f.tar.gz"],
)

http_archive(
name = "aliyun_oss_c_sdk",
build_file = "//third_party/oss_c_sdk:oss_c_sdk.BUILD",
sha256 = "6450d3970578c794b23e9e1645440c6f42f63be3f82383097660db5cf2fba685",
strip_prefix = "aliyun-oss-c-sdk-3.7.0",
urls = [
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/aliyun/aliyun-oss-c-sdk/archive/3.7.0.tar.gz",
],
)

http_archive(
name = "libexpat",
build_file = "//third_party/expat:libexpat.BUILD",
sha256 = "574499cba22a599393e28d99ecfa1e7fc85be7d6651d543045244d5b561cb7ff",
strip_prefix = "libexpat-R_2_2_6/expat",
urls = [
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/libexpat/libexpat/archive/R_2_2_6.tar.gz",
],
)

http_archive(
name = "libapr1",
build_file = "//third_party/apr1:libapr1.BUILD",
sha256 = "1a0909a1146a214a6ab9de28902045461901baab4e0ee43797539ec05b6dbae0",
strip_prefix = "apr-1.6.5",
patches = [
"//third_party/apr1:libapr1.patch",
],
urls = [
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/apache/apr/archive/1.6.5.tar.gz",
],
)

http_archive(
name = "libaprutil1",
build_file = "//third_party/aprutil1:libaprutil1.BUILD",
sha256 = "4c9ae319cedc16890fc2776920e7d529672dda9c3a9a9abd53bd80c2071b39af",
strip_prefix = "apr-util-1.6.1",
patches = [
"//third_party/aprutil1:libaprutil1.patch",
],
urls = [
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/apache/apr-util/archive/1.6.1.tar.gz",
],
)

http_archive(
name = "mxml",
build_file = "//third_party/mxml:mxml.BUILD",
sha256 = "4d850d15cdd4fdb9e82817eb069050d7575059a9a2729c82b23440e4445da199",
strip_prefix = "mxml-2.12",
patches = [
"//third_party/mxml:mxml.patch",
],
urls = [
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/michaelrsweet/mxml/archive/v2.12.tar.gz",
],
)

load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
boost_deps()

Expand Down
1 change: 1 addition & 0 deletions tensorflow_serving/model_servers/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ cc_library(
"//tensorflow_serving/config:platform_config_proto",
"//tensorflow_serving/core:availability_preserving_policy",
"//tensorflow_serving/servables/tensorflow:session_bundle_config_proto",
"//tensorflow_serving/util:tracer",
] + TENSORFLOW_DEPS + SUPPORTED_TENSORFLOW_OPS,
)

Expand Down
14 changes: 14 additions & 0 deletions tensorflow_serving/model_servers/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,20 @@ int main(int argc, char** argv) {
"TensorFlow Lite model from `model.tflite` file in "
"SavedModel directory instead of the TensorFlow model "
"from `saved_model.pb` file."),
tensorflow::Flag("timeline_start_step", &options.timeline_start_step,
"timeline_start_step"),
tensorflow::Flag("timeline_interval_step", &options.timeline_interval_step,
"timeline_interval_step"),
tensorflow::Flag("timeline_trace_count", &options.timeline_trace_count,
"timeline_trace_count"),
tensorflow::Flag("timeline_path", &options.timeline_path,
"timeline_path"),
tensorflow::Flag("oss_endpoint", &options.oss_endpoint,
"oss_endpoint"),
tensorflow::Flag("oss_access_id", &options.oss_access_id,
"oss_access_id"),
tensorflow::Flag("oss_access_key", &options.oss_access_key,
"oss_access_key"),
tensorflow::Flag("use_multi_stream", &options.use_multi_stream,
"Use multi-stream or not in session_group")};

Expand Down
34 changes: 34 additions & 0 deletions tensorflow_serving/model_servers/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/

#include "tensorflow_serving/model_servers/server.h"
#include "tensorflow_serving/util/tracer.h"

#include <unistd.h>

Expand Down Expand Up @@ -179,6 +180,35 @@ void Server::PollFilesystemAndReloadConfig(const string& config_file_path) {
}

namespace {
void ParseTimelineConfig(const Server::Options& options) {
auto start_step = options.timeline_start_step;
auto interval_step = options.timeline_interval_step;
auto trace_count = options.timeline_trace_count;
auto path = options.timeline_path;
if (start_step >= 0 && interval_step > 0
&& trace_count > 0 && !path.empty()) {
// save timeline to local
if (path[0] == '/') {
Tracer::GetTracer()->SetParams(start_step, interval_step, trace_count, path);
} else if (path.find("oss://") != std::string::npos) {
// save timeline to oss
if (options.oss_endpoint == "" ||
options.oss_access_id == "" ||
options.oss_access_key == "") {
LOG(ERROR) << "ERROR: Timeline require oss_endpoint, oss_access_id, and oss_access_key."
<< " We will not collect timeline.";
return;
}
Tracer::GetTracer()->SetParams(start_step,
interval_step, trace_count, options.oss_endpoint,
options.oss_access_id, options.oss_access_key, path);
} else {
LOG(ERROR) << "ERROR: Only support to save timeline to local or oss now."
<< " We will not collect timeline.";
}
}
}

Status CreatePlatformConfigMap(const Server::Options& server_options,
ServerCore::Options& options) {
const bool use_saved_model = true;
Expand All @@ -201,6 +231,8 @@ Status CreatePlatformConfigMap(const Server::Options& server_options,
"server_options.enable_batching to true.");
}

ParseTimelineConfig(server_options);

session_bundle_config.mutable_session_config()
->mutable_gpu_options()
->set_per_process_gpu_memory_fraction(
Expand Down Expand Up @@ -260,6 +292,8 @@ Status CreatePlatformConfigMapV2(const Server::Options& server_options,
auto model_session_config =
session_bundle_config.add_model_session_config();

ParseTimelineConfig(server_options);

// session num
model_session_config->set_session_num(
server_options.session_num_per_group);
Expand Down
9 changes: 9 additions & 0 deletions tensorflow_serving/model_servers/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ class Server {
tensorflow::string gpu_ids_list = "";
bool use_multi_stream = false;

// Timeline
tensorflow::int64 timeline_start_step = -1;
tensorflow::int64 timeline_interval_step = -1;
tensorflow::int64 timeline_trace_count = -1;
tensorflow::string timeline_path = "";
tensorflow::string oss_endpoint = "";
tensorflow::string oss_access_id = "";
tensorflow::string oss_access_key = "";

Options();
};

Expand Down
1 change: 1 addition & 0 deletions tensorflow_serving/servables/tensorflow/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ cc_library(
":util",
"//tensorflow_serving/apis:predict_proto",
"//tensorflow_serving/util:optional",
"//tensorflow_serving/util:tracer",
"@com_google_absl//absl/strings",
"@org_tensorflow//tensorflow/cc/saved_model:signature_constants",
"@org_tensorflow//tensorflow/contrib/session_bundle",
Expand Down
20 changes: 17 additions & 3 deletions tensorflow_serving/servables/tensorflow/predict_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/

#include "tensorflow_serving/servables/tensorflow/predict_util.h"
#include "tensorflow_serving/util/tracer.h"

#include <map>
#include <memory>
Expand All @@ -32,6 +33,9 @@ limitations under the License.

namespace tensorflow {
namespace serving {

#define likely(x) __builtin_expect(!!(x), 1)

namespace {

Status VerifySignature(const SignatureDef& signature) {
Expand Down Expand Up @@ -205,11 +209,21 @@ Status RunPredict(
TF_RETURN_IF_ERROR(PreProcessPrediction(signature, request, &input_tensors,
&output_tensor_names,
&output_tensor_aliases));
bool trace_timeline = Tracer::GetTracer()->NeedTracing();
std::vector<Tensor> outputs;
RunMetadata run_metadata;
TF_RETURN_IF_ERROR(session->Run(run_options, input_tensors,
output_tensor_names, {}, &outputs,
&run_metadata));
if (likely(!trace_timeline)) {
TF_RETURN_IF_ERROR(session->Run(run_options, input_tensors,
output_tensor_names, {}, &outputs,
&run_metadata));
} else {
RunOptions tmp_run_opt = run_options;
tmp_run_opt.set_trace_level(tensorflow::RunOptions::FULL_TRACE);
TF_RETURN_IF_ERROR(session->Run(tmp_run_opt, input_tensors,
output_tensor_names, {}, &outputs,
&run_metadata));
Tracer::GetTracer()->GenTimeline(run_metadata);
}

return PostProcessPredictionResult(output_tensor_aliases, outputs, option,
response);
Expand Down
10 changes: 10 additions & 0 deletions tensorflow_serving/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ cc_library(
],
)

cc_library(
name = "tracer",
hdrs = ["tracer.h"],
deps = [
"@aliyun_oss_c_sdk",
"@org_tensorflow//tensorflow/core:protos_all_cc",
"@org_tensorflow//tensorflow/core:framework",
],
)

cc_library(
name = "prometheus_exporter",
srcs = ["prometheus_exporter.cc"],
Expand Down
Loading

0 comments on commit b8717f3

Please sign in to comment.