diff --git a/.env b/.env new file mode 100644 index 00000000..b2e83160 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +COMPOSE_FILE=docker-compose.yml:docker-compose.observability.yml diff --git a/.gitignore b/.gitignore index 577f001d..63da1157 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/.env /build /dist /enduro diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml new file mode 100644 index 00000000..d59c5148 --- /dev/null +++ b/docker-compose.observability.yml @@ -0,0 +1,36 @@ +services: + + grafana: + image: grafana/grafana:10.0.3 + volumes: + - ./hack/etc/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_AUTH_DISABLE_LOGIN_FORM=true + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor + ports: + - "127.0.0.1:3000:3000" + + grafana-tempo: + image: "grafana/tempo:latest" + entrypoint: + - "/tempo" + command: + - "-config.file=/etc/grafana-tempo.yaml" + volumes: + - "./hack/etc/grafana-tempo.yaml:/etc/grafana-tempo.yaml" + - "./hack/stuff/tempo-data:/tmp/tempo" + ports: + - "127.0.0.1:4317:4317" + + grafana-agent: + image: "grafana/agent:latest" + entrypoint: + - "/bin/grafana-agent" + command: + - "-config.file=/etc/grafana-agent.yaml" + volumes: + - "./hack/etc/grafana-agent.yaml:/etc/grafana-agent.yaml" + ports: + - "127.0.0.1:12345:4317" diff --git a/enduro.toml b/enduro.toml index d9fc4dec..8510fb8f 100644 --- a/enduro.toml +++ b/enduro.toml @@ -4,6 +4,11 @@ verbosity = 2 debug = true debugListen = "127.0.0.1:9001" +[telemetry.traces] +enabled = true +address = "127.0.0.1:12345" +ratio = 1.0 + [temporal] namespace = "default" address = "127.0.0.1:7233" diff --git a/go.mod b/go.mod index 24222fb7..67ad5b45 100644 --- a/go.mod +++ b/go.mod @@ -29,12 +29,18 @@ require ( github.com/spf13/viper v1.16.0 github.com/stretchr/testify v1.8.4 go.artefactual.dev/tools v0.3.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0 + go.opentelemetry.io/otel v1.16.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.16.0 + go.opentelemetry.io/otel/sdk v1.16.0 + go.opentelemetry.io/otel/trace v1.16.0 go.temporal.io/api v1.23.0 go.temporal.io/sdk v1.23.1 goa.design/goa/v3 v3.12.1 goa.design/plugins/v3 v3.12.1 gocloud.dev v0.30.0 golang.org/x/sync v0.3.0 + google.golang.org/grpc v1.56.1 gotest.tools/v3 v3.5.0 ) @@ -68,6 +74,8 @@ require ( github.com/dimfeld/httptreemux/v5 v5.5.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect + github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.2.4 // indirect github.com/gogo/googleapis v1.4.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -79,6 +87,7 @@ require ( github.com/google/wire v0.5.0 // indirect github.com/googleapis/gax-go/v2 v2.11.0 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.15.2 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect @@ -107,6 +116,10 @@ require ( github.com/yuin/gopher-lua v1.1.0 // indirect github.com/zach-klippenstein/goregen v0.0.0-20160303162051-795b5e3961ea // indirect go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.16.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.16.0 // indirect + go.opentelemetry.io/otel/metric v1.16.0 // indirect + go.opentelemetry.io/proto/otlp v0.19.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.24.0 // indirect @@ -119,8 +132,9 @@ require ( golang.org/x/tools v0.10.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/api v0.128.0 // indirect + google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230530153820-e85fd2cbaebc // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230629202037-9506855d4529 // indirect - google.golang.org/grpc v1.56.1 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index e16ddd19..cf5deacd 100644 --- a/go.sum +++ b/go.sum @@ -1123,6 +1123,7 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF github.com/fatih/color v1.14.1/go.mod h1:2oHN61fhTpgcxD3TSWCgKDiH1+x4OiDVVGH8WlgGZGg= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.2/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/flowstack/go-jsonschema v0.1.1/go.mod h1:yL7fNggx1o8rm9RlgXv7hTBWxdBM0rVwpMwimd3F3N0= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= @@ -1183,6 +1184,7 @@ github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbV github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/stdr v1.2.0/go.mod h1:YkVgnZu1ZjjL7xTxrfm/LLZBfkhTqSR1ydtm6jTKKwI= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= @@ -1316,6 +1318,7 @@ github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EO github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= +github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -1496,6 +1499,7 @@ github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3/go.mod h1:o//XUCC/F+yRGJoPO/VU0GSB0f8Nhgmxx0VIRUvaC0w= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.15.2 h1:gDLXvp5S9izjldquuoAhDzccbskOL6tDC5jMSyx3zxE= github.com/grpc-ecosystem/grpc-gateway/v2 v2.15.2/go.mod h1:7pdNwVWBBHGiCxa9lAszqCJMbfTISJ7oMftp8+UGV08= github.com/hanwen/go-fuse/v2 v2.3.0/go.mod h1:xKwi1cF7nXAOBCXujD5ie0ZKsxc8GGSA1rlMJc+8IJs= github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= @@ -2269,34 +2273,48 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.2 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.31.0/go.mod h1:PFmBsWbldL1kiWZk9+0LBZz2brhByaGsvp6pRICMlPE= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.40.0/go.mod h1:pcQ3MM3SWvrA71U4GDqv9UFDJ3HQsW7y5ZO3tDTlUdI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0 h1:pginetY7+onl4qN1vl0xW/V/v6OBZ0vVdH+esuJgvmM= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0/go.mod h1:XiYsayHc36K3EByOO6nbAXnAWbrUxdjUROCEeeROOH8= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= go.opentelemetry.io/otel v1.3.0/go.mod h1:PWIKzi6JCp7sM0k9yZ43VX+T345uNbAkDKwHVjb2PTs= go.opentelemetry.io/otel v1.6.0/go.mod h1:bfJD2DZVw0LBxghOTlgnlI0CV3hLDu9XF/QKOUXMTQQ= go.opentelemetry.io/otel v1.6.1/go.mod h1:blzUabWHkX6LJewxvadmzafgh/wnvBSDBdOuwkAtrWQ= go.opentelemetry.io/otel v1.11.1/go.mod h1:1nNhXBbWSD0nsL38H6btgnFN2k4i0sNLHNNMZMSbUGE= go.opentelemetry.io/otel v1.14.0/go.mod h1:o4buv+dJzx8rohcUeRmWUZhqupFvzWis188WlggnNeU= +go.opentelemetry.io/otel v1.16.0 h1:Z7GVAX/UkAXPKsy94IU+i6thsQS4nb7LviLpnaNeW8s= +go.opentelemetry.io/otel v1.16.0/go.mod h1:vl0h9NUa1D5s1nv3A5vZOYWn8av4K8Ml6JDeHrT/bx4= go.opentelemetry.io/otel/exporters/otlp v0.20.0/go.mod h1:YIieizyaN77rtLJra0buKiNBOm9XQfkPEKBeuhoMwAM= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.3.0/go.mod h1:VpP4/RMn8bv8gNo9uK7/IMY4mtWLELsS+JIP0inH0h4= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.6.1/go.mod h1:NEu79Xo32iVb+0gVNV8PMd7GoWqnyDXRlj04yFjqz40= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.14.0/go.mod h1:UFG7EBMRdXyFstOwH028U0sVf+AvukSGhF0g8+dmNG8= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.16.0 h1:t4ZwRPU+emrcvM2e9DHd0Fsf0JTPVcbfa/BhTDF03d0= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.16.0/go.mod h1:vLarbg68dH2Wa77g71zmKQqlQ8+8Rq3GRG31uc0WcWI= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.3.0/go.mod h1:hO1KLR7jcKaDDKDkvI9dP/FIhpmna5lkqPUQdEjFAM8= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.6.1/go.mod h1:YJ/JbY5ag/tSQFXzH3mtDmHqzF3aFn3DI/aB1n7pt4w= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.14.0/go.mod h1:HrbCVv40OOLTABmOn1ZWty6CHXkU8DK/Urc43tHug70= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.16.0 h1:cbsD4cUcviQGXdw8+bo5x2wazq10SKz8hEbtCRPcU78= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.16.0/go.mod h1:JgXSGah17croqhJfhByOLVY719k1emAXC8MVhCIJlRs= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.3.0/go.mod h1:keUU7UfnwWTWpJ+FWnyqmogPa82nuU5VUANFq49hlMY= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.6.1/go.mod h1:UJJXJj0rltNIemDMwkOJyggsvyMG9QHfJeFH0HS5JjM= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.14.0/go.mod h1:5w41DY6S9gZrbjuq6Y+753e96WfPha5IcsOSZTtullM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.16.0 h1:TVQp/bboR4mhZSav+MdgXB8FaRho1RC8UwVn3T0vjVc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.16.0/go.mod h1:I33vtIe0sR96wfrUcilIzLoA3mLHhRmz9S9Te0S3gDo= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.3.0/go.mod h1:QNX1aly8ehqqX1LEa6YniTU7VY9I6R3X/oPxhGdTceE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.6.1/go.mod h1:DAKwdo06hFLc0U88O10x4xnb5sc7dDRDqRuiN+io8JE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.14.0/go.mod h1:+N7zNjIJv4K+DeX67XXET0P+eIciESgaFDBqh+ZJFS4= go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= go.opentelemetry.io/otel/metric v0.28.0/go.mod h1:TrzsfQAmQaB1PDcdhBauLMk7nyyg9hm+GoQq/ekE9Iw= go.opentelemetry.io/otel/metric v0.37.0/go.mod h1:DmdaHfGt54iV6UKxsV9slj2bBRJcKC1B1uvDLIioc1s= +go.opentelemetry.io/otel/metric v1.16.0 h1:RbrpwVG1Hfv85LgnZ7+txXioPDoh6EdbZHo26Q3hqOo= +go.opentelemetry.io/otel/metric v1.16.0/go.mod h1:QE47cpOmkwipPiefDwo2wDzwJrlfxxNYodqc4xnGCo4= go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= go.opentelemetry.io/otel/sdk v0.20.0/go.mod h1:g/IcepuwNsoiX5Byy2nNV0ySUF1em498m7hBWC279Yc= go.opentelemetry.io/otel/sdk v1.3.0/go.mod h1:rIo4suHNhQwBIPg9axF8V9CA72Wz2mKF1teNrup8yzs= go.opentelemetry.io/otel/sdk v1.6.1/go.mod h1:IVYrddmFZ+eJqu2k38qD3WezFR2pymCzm8tdxyh3R4E= go.opentelemetry.io/otel/sdk v1.11.1/go.mod h1:/l3FE4SupHJ12TduVjUkZtlfFqDCQJlOlithYrdktys= go.opentelemetry.io/otel/sdk v1.14.0/go.mod h1:bwIC5TjrNG6QDCHNWvW4HLHtUQ4I+VQDsnjhvyZCALM= +go.opentelemetry.io/otel/sdk v1.16.0 h1:Z1Ok1YsijYL0CSJpHt4cS3wDDh7p572grzNrBMiMWgE= +go.opentelemetry.io/otel/sdk v1.16.0/go.mod h1:tMsIuKXuuIWPBAOrH+eHtvhTL+SntFtXF9QD68aP6p4= go.opentelemetry.io/otel/sdk/export/metric v0.20.0/go.mod h1:h7RBNMsDJ5pmI1zExLi+bJK+Dr8NQCh0qGhm1KDnNlE= go.opentelemetry.io/otel/sdk/metric v0.20.0/go.mod h1:knxiS8Xd4E/N+ZqKmUPf3gTTZ4/0TjTXukfxjzSTpHE= go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw= @@ -2305,10 +2323,13 @@ go.opentelemetry.io/otel/trace v1.6.0/go.mod h1:qs7BrU5cZ8dXQHBGxHMOxwME/27YH2qE go.opentelemetry.io/otel/trace v1.6.1/go.mod h1:RkFRM1m0puWIq10oxImnGEduNBzxiN7TXluRBtE+5j0= go.opentelemetry.io/otel/trace v1.11.1/go.mod h1:f/Q9G7vzk5u91PhbmKbg1Qn0rzH1LJ4vbPHFGkTPtOk= go.opentelemetry.io/otel/trace v1.14.0/go.mod h1:8avnQLK+CG77yNLUae4ea2JDQ6iT+gozhnZjy/rw9G8= +go.opentelemetry.io/otel/trace v1.16.0 h1:8JRpaObFoW0pxuVPapkgH8UhHQj+bJW8jJsCZEu5MQs= +go.opentelemetry.io/otel/trace v1.16.0/go.mod h1:Yt9vYq1SdNz3xdjZZK7wcXv1qv2pwLkqr2QVwea0ef0= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.11.0/go.mod h1:QpEjXPrNQzrFDZgoTo49dgHR9RYRSrg3NAKnUGl9YpQ= go.opentelemetry.io/proto/otlp v0.12.1/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.opentelemetry.io/proto/otlp v0.15.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= +go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.temporal.io/api v1.21.0/go.mod h1:xlsUEakkN2vU2/WV7e5NqMG4N93nfuNfvbXdaXUpU8w= go.temporal.io/api v1.23.0 h1:4y9mTQjEHsE0Du0WJ2ExJUcP/1/a+B/UefzIDm4ALTE= diff --git a/hack/etc/grafana-agent.yaml b/hack/etc/grafana-agent.yaml new file mode 100644 index 00000000..23b7f03c --- /dev/null +++ b/hack/etc/grafana-agent.yaml @@ -0,0 +1,22 @@ +server: + log_level: "info" + +traces: + configs: + - name: "default" + receivers: + otlp: + protocols: + grpc: + endpoint: "0.0.0.0:4317" + remote_write: + - endpoint: "grafana-tempo:4317" + insecure: true + batch: + timeout: "1s" + send_batch_size: 100 + automatic_logging: + backend: "stdout" + spans: true + processes: true + roots: true diff --git a/hack/etc/grafana-datasources.yaml b/hack/etc/grafana-datasources.yaml new file mode 100644 index 00000000..16fe24b3 --- /dev/null +++ b/hack/etc/grafana-datasources.yaml @@ -0,0 +1,16 @@ +apiVersion: 1 + +datasources: +- name: "Tempo" + type: "tempo" + access: "proxy" + orgId: 1 + url: "http://grafana-tempo:3200" + basicAuth: false + isDefault: true + version: 1 + editable: false + apiVersion: 1 + uid: "tempo" + jsonData: + httpMethod: "GET" diff --git a/hack/etc/grafana-tempo.yaml b/hack/etc/grafana-tempo.yaml new file mode 100644 index 00000000..385429c3 --- /dev/null +++ b/hack/etc/grafana-tempo.yaml @@ -0,0 +1,20 @@ +server: + http_listen_port: 3200 + graceful_shutdown_timeout: "1s" + +distributor: + receivers: + otlp: + protocols: + grpc: + log_received_spans: + enabled: true + include_all_attributes: true + +storage: + trace: + backend: "local" + wal: + path: "/tmp/tempo/wal" + local: + path: "/tmp/tempo/blocks" diff --git a/hack/etc/prometheus.yaml b/hack/etc/prometheus.yaml new file mode 100644 index 00000000..fc8e4ad2 --- /dev/null +++ b/hack/etc/prometheus.yaml @@ -0,0 +1,11 @@ +global: + scrape_interval: "15s" + evaluation_interval: "15s" + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + - job_name: "tempo" + static_configs: + - targets: ["tempo:3200"] diff --git a/internal/api/api.go b/internal/api/api.go index f5555dc9..2dea820a 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -19,6 +19,8 @@ import ( "github.com/go-logr/logr" "github.com/gorilla/websocket" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel/trace" goahttp "goa.design/goa/v3/http" goahttpmwr "goa.design/goa/v3/http/middleware" "goa.design/goa/v3/middleware" @@ -37,7 +39,9 @@ import ( ) func HTTPServer( - logger logr.Logger, config *Config, + logger logr.Logger, + tp trace.TracerProvider, + config *Config, pipesvc intpipe.Service, batchsvc intbatch.Service, colsvc intcol.Service, @@ -81,6 +85,7 @@ func HTTPServer( // Global middlewares. var handler http.Handler = mux + handler = otelhttp.NewHandler(handler, "enduro/internal/api", otelhttp.WithTracerProvider(tp)) handler = goahttpmwr.RequestID()(handler) handler = versionHeaderMiddleware(config.AppVersion)(handler) if config.Debug { diff --git a/internal/batch/service.go b/internal/batch/service.go index 510f24a1..b8abff95 100644 --- a/internal/batch/service.go +++ b/internal/batch/service.go @@ -9,6 +9,7 @@ import ( "github.com/go-logr/logr" "go.artefactual.dev/tools/ref" + "go.opentelemetry.io/otel/trace" temporalapi_enums "go.temporal.io/api/enums/v1" temporalapi_serviceerror "go.temporal.io/api/serviceerror" temporalsdk_client "go.temporal.io/sdk/client" @@ -134,7 +135,8 @@ func (s *batchImpl) Hints(ctx context.Context) (*goabatch.BatchHintsResult, erro func (s *batchImpl) InitProcessingWorkflow(ctx context.Context, req *collection.ProcessingWorkflowRequest) error { req.ValidationConfig = validation.Config{} - err := collection.InitProcessingWorkflow(ctx, s.cc, s.taskQueue, req) + tr := trace.NewNoopTracerProvider().Tracer("") + err := collection.InitProcessingWorkflow(ctx, tr, s.cc, s.taskQueue, req) if err != nil { s.logger.Error(err, "Error initializing processing workflow.") } diff --git a/internal/collection/goa.go b/internal/collection/goa.go index 07eb435d..87362d98 100644 --- a/internal/collection/goa.go +++ b/internal/collection/goa.go @@ -13,6 +13,7 @@ import ( "strings" "time" + "go.opentelemetry.io/otel/trace" temporalapi_common "go.temporal.io/api/common/v1" temporalapi_enums "go.temporal.io/api/enums/v1" temporalapi_serviceerror "go.temporal.io/api/serviceerror" @@ -265,7 +266,8 @@ func (w *goaWrapper) Retry(ctx context.Context, payload *goacollection.RetryPayl req.WorkflowID = *goacol.WorkflowID req.CollectionID = goacol.ID - if err := InitProcessingWorkflow(ctx, w.cc, w.taskQueue, req); err != nil { + tr := trace.NewNoopTracerProvider().Tracer("") + if err := InitProcessingWorkflow(ctx, tr, w.cc, w.taskQueue, req); err != nil { return fmt.Errorf("error starting the new workflow instance: %w", err) } diff --git a/internal/collection/workflow.go b/internal/collection/workflow.go index 81593259..0b903fbc 100644 --- a/internal/collection/workflow.go +++ b/internal/collection/workflow.go @@ -6,6 +6,8 @@ import ( "time" "github.com/google/uuid" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" temporalsdk_api_enums "go.temporal.io/api/enums/v1" temporalsdk_client "go.temporal.io/sdk/client" @@ -60,7 +62,10 @@ type ProcessingWorkflowRequest struct { RejectDuplicates bool } -func InitProcessingWorkflow(ctx context.Context, c temporalsdk_client.Client, taskQueue string, req *ProcessingWorkflowRequest) error { +func InitProcessingWorkflow(ctx context.Context, tr trace.Tracer, c temporalsdk_client.Client, taskQueue string, req *ProcessingWorkflowRequest) error { + _, span := tr.Start(ctx, "InitProcessingWorkflow") + defer span.End() + if req.WorkflowID == "" { req.WorkflowID = fmt.Sprintf("processing-workflow-%s", uuid.New().String()) } @@ -74,6 +79,10 @@ func InitProcessingWorkflow(ctx context.Context, c temporalsdk_client.Client, ta WorkflowIDReusePolicy: temporalsdk_api_enums.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, } _, err := c.ExecuteWorkflow(ctx, opts, ProcessingWorkflowName, req) + if err != nil { + span.SetStatus(codes.Error, "ExecuteWorkflow failed") + span.RecordError(err) + } return err } diff --git a/main.go b/main.go index 00432060..6337a780 100644 --- a/main.go +++ b/main.go @@ -13,15 +13,25 @@ import ( "syscall" "time" + "github.com/go-logr/logr" "github.com/oklog/run" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/pflag" "github.com/spf13/viper" "go.artefactual.dev/tools/log" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.17.0" + "go.opentelemetry.io/otel/trace" temporalsdk_activity "go.temporal.io/sdk/activity" temporalsdk_client "go.temporal.io/sdk/client" temporalsdk_worker "go.temporal.io/sdk/worker" temporalsdk_workflow "go.temporal.io/sdk/workflow" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" "github.com/artefactual-labs/enduro/internal/api" "github.com/artefactual-labs/enduro/internal/batch" @@ -92,12 +102,28 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + // Set up tracing. + tp, shutdown, err := initTracerProvider(ctx, logger, config.Telemetry) + if err != nil { + logger.Error(err, "Error creating tracer provider.") + os.Exit(1) + } + defer func() { _ = shutdown(ctx) }() + tracer := tp.Tracer("enduro") + database, err := db.Connect(config.Database.DSN) if err != nil { logger.Error(err, "Database configuration failed.") os.Exit(1) } - _ = database.Ping() + _, span := tracer.Start(ctx, "db-ping") + span.SetAttributes(attribute.String("db.driver", "mysql")) + if err := database.Ping(); err != nil { + span.SetStatus(codes.Error, "ping failed") + span.RecordError(err) + } + span.AddEvent("Connected!") + span.End() temporalClient, err := temporalsdk_client.Dial(temporalsdk_client.Options{ Namespace: config.Temporal.Namespace, @@ -152,7 +178,7 @@ func main() { g.Add( func() error { - srv = api.HTTPServer(logger, &config.API, pipesvc, batchsvc, colsvc) + srv = api.HTTPServer(logger, tp, &config.API, pipesvc, batchsvc, colsvc) return srv.ListenAndServe() }, func(err error) { @@ -166,8 +192,8 @@ func main() { // Watchers, where each watcher is a group actor. { for _, w := range wsvc.Watchers() { + w := w done := make(chan struct{}) - cur := w g.Add( func() error { for { @@ -175,30 +201,36 @@ func main() { case <-done: return nil default: - event, err := cur.Watch(ctx) + event, err := w.Watch(ctx) if err != nil { if !errors.Is(err, watcher.ErrWatchTimeout) { - logger.Error(err, "Error monitoring watcher interface.", "watcher", cur) + logger.Error(err, "Error monitoring watcher interface.", "watcher", w) } continue } + ctx, span := tracer.Start(ctx, "Watcher") + span.SetAttributes( + attribute.String("watcher", event.WatcherName), + attribute.String("bucket", event.Bucket), + attribute.String("key", event.Key), + attribute.Bool("dir", event.IsDir), + ) logger.V(1).Info("Starting new workflow", "watcher", event.WatcherName, "bucket", event.Bucket, "key", event.Key, "dir", event.IsDir) - go func() { - req := collection.ProcessingWorkflowRequest{ - WatcherName: event.WatcherName, - PipelineNames: event.PipelineName, - RetentionPeriod: event.RetentionPeriod, - CompletedDir: event.CompletedDir, - StripTopLevelDir: event.StripTopLevelDir, - RejectDuplicates: event.RejectDuplicates, - Key: event.Key, - IsDir: event.IsDir, - ValidationConfig: config.Validation, - } - if err := collection.InitProcessingWorkflow(ctx, temporalClient, config.Temporal.TaskQueue, &req); err != nil { - logger.Error(err, "Error initializing processing workflow.") - } - }() + req := collection.ProcessingWorkflowRequest{ + WatcherName: event.WatcherName, + PipelineNames: event.PipelineName, + RetentionPeriod: event.RetentionPeriod, + CompletedDir: event.CompletedDir, + StripTopLevelDir: event.StripTopLevelDir, + RejectDuplicates: event.RejectDuplicates, + Key: event.Key, + IsDir: event.IsDir, + ValidationConfig: config.Validation, + } + if err := collection.InitProcessingWorkflow(ctx, tracer, temporalClient, config.Temporal.TaskQueue, &req); err != nil { + logger.Error(err, "Error initializing processing workflow.") + } + span.End() } } }, @@ -346,6 +378,7 @@ type configuration struct { Watcher watcher.Config Pipeline []pipeline.Config Validation validation.Config + Telemetry TelemetryConfig // This is a workaround for client-specific functionality. // Simple mechanism to support an arbitrary number of hooks and parameters. @@ -393,3 +426,60 @@ func readConfig(v *viper.Viper, config *configuration, configFile string) (found return found, nil } + +type TelemetryConfig struct { + Traces struct { + Enabled bool + Address string + SamplingRatio *float64 + } +} + +func initTracerProvider(ctx context.Context, logger logr.Logger, cfg TelemetryConfig) (trace.TracerProvider, func(context.Context) error, error) { + if !cfg.Traces.Enabled || cfg.Traces.Address == "" { + logger.V(1).Info("Tracing system is disabled.", "enabled", cfg.Traces.Enabled, "addr", cfg.Traces.Address) + shutdown := func(context.Context) error { return nil } + return trace.NewNoopTracerProvider(), shutdown, nil + } + + conn, err := grpc.DialContext( + ctx, + cfg.Traces.Address, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithBlock(), + ) + if err != nil { + return nil, nil, fmt.Errorf("can't connect to telemetry data collector: %v", err) + } + + exporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithGRPCConn(conn)) + if err != nil { + return nil, nil, fmt.Errorf("can't create gRPC telemetry data exporter: %v", err) + } + + resource, _ := resource.Merge( + resource.Default(), + resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceName(appName), + semconv.ServiceVersion(version), + ), + ) + + var ratio float64 = 1 + if cfg.Traces.SamplingRatio != nil { + ratio = *cfg.Traces.SamplingRatio + } + sampler := sdktrace.ParentBased(sdktrace.TraceIDRatioBased(ratio)) + + tp := sdktrace.NewTracerProvider( + sdktrace.WithSampler(sampler), + sdktrace.WithResource(resource), + sdktrace.WithBatcher(exporter), + ) + shutdown := func(context.Context) error { return tp.Shutdown(ctx) } + + logger.V(1).Info("Using OTel gRPC tracer provider.", "addr", cfg.Traces.Address, "ratio", ratio) + + return tp, shutdown, nil +} diff --git a/website/content/en/docs/development/environment.md b/website/content/en/docs/development/environment.md index be37e475..9b6cc045 100644 --- a/website/content/en/docs/development/environment.md +++ b/website/content/en/docs/development/environment.md @@ -117,6 +117,49 @@ You can enable it in Visual Studio Code as follows: } ``` +## Enable tracing + +Enable the observability services by editing the root `.env` file as follows: + + COMPOSE_FILE=docker-compose.yml:docker-compose.observability.yml + +Start all services again: + + docker compose up -d + +Three new services should be ready: + +* Grafana (listens on `127.0.0.1:3000`), +* Grafana Agent (listens on `127.0.0.1:12345`), and +* Grafana Tempo (listens on `127.0.0.1:4317`). + +Enable tracing in `enduro.toml`: + +```toml +[telemetry.traces] +enabled = true +address = "127.0.0.1:12345" +ratio = 1.0 +``` + +Run enduro: + + make run + +Things that you can do: + +* Observe that `grafana-agent` logs received traces: + + docker compose logs -f grafana-agent + +* Observe that `grafana-tempo` logs stored traces: + + docker compose logs -f grafana-tempo + +* Use the [Grafana explorer](http://127.0.0.1:3000/explore) to search and + visualize traces. + + [docker-engine]: https://docs.docker.com/engine/install/ [mc]: https://docs.min.io/docs/minio-client-quickstart-guide.html [go]: https://golang.org/doc/install diff --git a/website/content/en/docs/user-manual/configuration.md b/website/content/en/docs/user-manual/configuration.md index 91df7b44..730e36f9 100644 --- a/website/content/en/docs/user-manual/configuration.md +++ b/website/content/en/docs/user-manual/configuration.md @@ -23,10 +23,16 @@ file using the optional argument `--config=example.toml`. Main configuration attributes that do not belong to a specific section. +#### `verbosity` (Int) + +Chattiness of log operations. `0` is the default and best for production. + +E.g.: `0` + #### `debug` (Bool) -When enabled, the application logger will be configured with increased -verbosity and a colored formatter. +When enabled, the application logger will be configured with a human-readable +format and a colored log record formatter. E.g.: `false` @@ -37,6 +43,48 @@ data. E.g.: `"127.0.0.1:9001"` +### `[telemetry]` + +Telemetry configuration details. + +#### `[telemetry.traces]` + +Tracing configuration. + +For example: + +```toml +[telemetry.traces] +enabled = false +address = "127.0.0.1:12345" +ratio = 1.0 +``` + +#### `enabled` (Bool) + +When enabled, traces will be delivered to the tracing data collector. It is +disabled by default. + +E.g.: `false` + +#### `address` (String) + +Address of the OpenTelemetry tracing data collector (gRPC), e.g. Grafana Agent +or OpenTelemetry Collector. + +E.g.: `"127.0.0.1:12345"` + +#### `ratio` (Float64) + +Sampling ratio. A sampling ratio of 0.25 means that, on average, one out of +every four traces will be sampled. The default is to sample every transfer. + +E.g.: `0.25` + +#### `[telemetry.metrics]` + +Not available yet. + ### `[temporal]` Connection details with the Temporal server.