diff --git a/go.mod b/go.mod
index 48006e83c0..df5b933e26 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
github.com/couchbase/go-couchbase v0.1.1
github.com/couchbase/gocb/v2 v2.5.3-0.20220803131303-46b466983d0f
github.com/couchbase/gocbcore/v10 v10.1.5-0.20220809160836-bf53e9527651
+ github.com/couchbase/gocbcore/v9 v9.1.8
github.com/couchbase/gomemcached v0.1.4
github.com/couchbase/goutils v0.1.2
github.com/couchbase/sg-bucket v0.0.0-20220916154817-791744ac79b7
@@ -35,6 +36,7 @@ require (
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e
golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8
+ gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/couchbase/gocb.v1 v1.6.7
gopkg.in/couchbase/gocbcore.v7 v7.1.18
gopkg.in/couchbaselabs/gocbconnstr.v1 v1.0.4
@@ -42,11 +44,12 @@ require (
)
require (
+ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
+ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/couchbase/blance v0.1.2 // indirect
github.com/couchbase/cbauth v0.1.1 // indirect
- github.com/couchbase/gocbcore/v9 v9.1.8 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/golang/protobuf v1.5.2 // indirect
diff --git a/go.sum b/go.sum
index c67286ea99..0e36fcf17e 100644
--- a/go.sum
+++ b/go.sum
@@ -35,9 +35,11 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
+github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
@@ -79,20 +81,12 @@ github.com/couchbase/gomemcached v0.1.4 h1:5n5wmr4dBu+X7XteP8QHP5S9inK9MBjNpN9b7
github.com/couchbase/gomemcached v0.1.4/go.mod h1:mxliKQxOv84gQ0bJWbI+w9Wxdpt9HjDvgW9MjCym5Vo=
github.com/couchbase/goutils v0.1.2 h1:gWr8B6XNWPIhfalHNog3qQKfGiYyh4K4VhO3P2o9BCs=
github.com/couchbase/goutils v0.1.2/go.mod h1:h89Ek/tiOxxqjz30nPPlwZdQbdB8BwgnuBxeoUe/ViE=
-github.com/couchbase/sg-bucket v0.0.0-20220725152948-e1112ff01a3d h1:cYrMXK8u0FQEKTes5PkRmbkao1EjESwH+6SHc7rDHsE=
-github.com/couchbase/sg-bucket v0.0.0-20220725152948-e1112ff01a3d/go.mod h1:9XQoB1t+elPP+yEjHGOX3xcC3Z0/qDgOI7h/fc9XjlU=
-github.com/couchbase/sg-bucket v0.0.0-20220824103435-aa28032bc2a3 h1:bPd/j0+YqKAOcekv0INIxjSiheB6Lle7wXAanGPb5Eo=
-github.com/couchbase/sg-bucket v0.0.0-20220824103435-aa28032bc2a3/go.mod h1:9XQoB1t+elPP+yEjHGOX3xcC3Z0/qDgOI7h/fc9XjlU=
github.com/couchbase/sg-bucket v0.0.0-20220916154817-791744ac79b7 h1:0ahmhcMnxhExwEp6J+tEtHLw4kiUPU/Flb9HWmB9N2U=
github.com/couchbase/sg-bucket v0.0.0-20220916154817-791744ac79b7/go.mod h1:9XQoB1t+elPP+yEjHGOX3xcC3Z0/qDgOI7h/fc9XjlU=
github.com/couchbaselabs/go-fleecedelta v0.0.0-20200408160354-2ed3f45fde8f h1:al5DxXEBAUmINnP5dR950gL47424WzncuRpNdg0TWR0=
github.com/couchbaselabs/go-fleecedelta v0.0.0-20200408160354-2ed3f45fde8f/go.mod h1:daOs69VstinwoALl3wwWxjBf1nD4lIe3wwYhKHKDapY=
github.com/couchbaselabs/gocaves/client v0.0.0-20220223122017-22859b310bd2 h1:UlwJ2GWpZQAQCLHyO3xHKcqAjUUcX2w7FKpbxCIUQks=
github.com/couchbaselabs/gocaves/client v0.0.0-20220223122017-22859b310bd2/go.mod h1:AVekAZwIY2stsJOMWLAS/0uA/+qdp7pjO8EHnl61QkY=
-github.com/couchbaselabs/walrus v0.0.0-20220726144228-c44d71d14a7a h1:hiVwAQnRHvo1oTYaOhNmxNg1KB0dTC9Htw/zDj0L0TU=
-github.com/couchbaselabs/walrus v0.0.0-20220726144228-c44d71d14a7a/go.mod h1:C5TylJ1hRbYFgPnc/6gKUUPkLvkt7xDotgApqzd3dbg=
-github.com/couchbaselabs/walrus v0.0.0-20220824104645-4bbd432c7128 h1:jvVNAyzrEyplEP+o43Y7zUYfxMD5m/6N2vf5kswEUZ8=
-github.com/couchbaselabs/walrus v0.0.0-20220824104645-4bbd432c7128/go.mod h1:bg6u+qv616GUzyZng1swDEpgSn1k9FKpV+mxX7jAcAw=
github.com/couchbaselabs/walrus v0.0.0-20220916160453-6f7d5a152116 h1:/+J3rdBCFJieNnyQiFDSUBJnZD2D6Uh/1Dy4PXC+0WQ=
github.com/couchbaselabs/walrus v0.0.0-20220916160453-6f7d5a152116/go.mod h1:1Gy0YiYTNnuS4ThzYwKqz5X8q9qlCjAoqb/E1QbJdps=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -199,7 +193,6 @@ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
-github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM=
github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
@@ -397,10 +390,6 @@ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81R
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20220805013720-a33c5aa5df48 h1:N9Vc/rorQUDes6B9CNdIxAn5jODGj2wzfrei2x4wNj4=
-golang.org/x/net v0.0.0-20220805013720-a33c5aa5df48/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
-golang.org/x/net v0.0.0-20220822230855-b0a4917ee28c h1:JVAXQ10yGGVbSyoer5VILysz6YKjdNT2bsvlayjqhes=
-golang.org/x/net v0.0.0-20220822230855-b0a4917ee28c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1 h1:TWZxd/th7FbRSMret2MVQdlI8uT49QEtwZdvJrxjEHU=
golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -596,6 +585,7 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/tools/password_remover.py b/tools/password_remover.py
deleted file mode 100644
index fc788cb43e..0000000000
--- a/tools/password_remover.py
+++ /dev/null
@@ -1,629 +0,0 @@
-"""
-Copyright 2016-Present Couchbase, Inc.
-
-Use of this software is governed by the Business Source License included in
-the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
-file, in accordance with the Business Source License, use of this software will
-be governed by the Apache License, Version 2.0, included in the file
-licenses/APL2.txt.
-"""
-
-"""
-Redacts sensitive data in config files
-
-"""
-
-
-import json
-import traceback
-import unittest
-from urllib.parse import urlparse
-
-
-def is_valid_json(invalid_json):
- """
- Is the given string valid JSON?
- """
- got_exception = True
- try:
- json.loads(invalid_json)
- got_exception = False
- except Exception as e:
- pass
-
- return got_exception is False
-
-
-def tag_userdata_in_server_config(json_text, log_json_parsing_exceptions=True):
- """
- Content postprocessor that tags user data in a config ready for post-process redaction
- """
- try:
- valid_json = convert_to_valid_json(json_text)
-
- # Lower case keys so that "databases" works as a
- # key even if the JSON has "Databases" as a key.
- parsed_json = lower_keys_dict(valid_json)
-
- tag_userdata_in_server_json(parsed_json)
- formatted_json_string = json.dumps(parsed_json, indent=4)
- return formatted_json_string
-
- except Exception as e:
- if log_json_parsing_exceptions:
- print("Exception trying to tag config user data in {0}. Exception: {1}".format(json_text, e))
- traceback.print_exc()
- return '{"Error":"Error in sgcollect_info password_remover.py trying to tag config user data. See logs for details"}'
-
-
-def tag_userdata_in_server_json(config):
- """
- Given a dictionary that contains a full set of configuration values:
- - Tag any sensitive user-data fields with tags.
- """
-
- if "databases" in config:
- dbs = config["databases"]
- for db in dbs:
- tag_userdata_in_db_json(dbs[db])
-
-
-def tag_userdata_in_db_config(json_text, log_json_parsing_exceptions=True):
- """
- Content postprocessor that tags user data in a db config ready for post-process redaction
- """
- try:
- valid_json = convert_to_valid_json(json_text)
-
- # Lower case keys so that "databases" works as a
- # key even if the JSON has "Databases" as a key.
- parsed_json = lower_keys_dict(valid_json)
-
- tag_userdata_in_db_json(parsed_json)
- formatted_json_string = json.dumps(parsed_json, indent=4)
- return formatted_json_string
-
- except Exception as e:
- if log_json_parsing_exceptions:
- print("Exception trying to tag db config user data in {0}. Exception: {1}".format(json_text, e))
- traceback.print_exc()
- return '{"Error":"Error in sgcollect_info password_remover.py trying to tag db config user data. See logs for details"}'
-
-
-def tag_userdata_in_db_json(db):
- """
- Given a dictionary that contains a set of db configuration values:
- - Tag any sensitive user-data fields with tags.
- """
-
- if "username" in db:
- db["username"] = UD(db["username"])
-
- if "users" in db:
- users = db["users"]
- for username in users:
- user = users[username]
- if "name" in user:
- user["name"] = UD(user["name"])
- if "admin_channels" in user:
- admin_channels = user["admin_channels"]
- for i, _ in enumerate(admin_channels):
- admin_channels[i] = UD(admin_channels[i])
- if "admin_roles" in user:
- admin_roles = user["admin_roles"]
- for i, _ in enumerate(admin_roles):
- admin_roles[i] = UD(admin_roles[i])
- # Tag dict keys. Can't be done in the above loop.
- for i, _ in list(users.items()):
- users[UD(i)] = users.pop(i)
-
- if "roles" in db:
- roles = db["roles"]
- for rolename in roles:
- role = roles[rolename]
- if "admin_channels" in role:
- admin_channels = role["admin_channels"]
- for i, _ in enumerate(admin_channels):
- admin_channels[i] = UD(admin_channels[i])
- # Tag dict keys. Can't be done in the above loop.
- for i, _ in list(roles.items()):
- roles[UD(i)] = roles.pop(i)
-
-
-def UD(value):
- """
- Tags the given value with User Data tags.
- """
- return "{0}".format(value)
-
-
-def remove_passwords_from_config(config_fragment):
- """
- Given a dictionary that contains configuration values, recursively walk the dictionary and:
-
- - Replace any fields w/ key "password" with "*****"
- - Replace any fields w/ key "server" with the result of running it through strip_password_from_url()
- """
-
- if not isinstance(config_fragment, dict):
- return
-
- if "server" in config_fragment:
- config_fragment["server"] = strip_password_from_url(config_fragment["server"])
- if "password" in config_fragment:
- config_fragment["password"] = "******"
-
- for key, item in list(config_fragment.items()):
- if isinstance(item, dict):
- remove_passwords_from_config(item)
-
-
-def remove_passwords(json_text, log_json_parsing_exceptions=True):
- """
- Content postprocessor that strips out all of the sensitive passwords
- """
- try:
- valid_json = convert_to_valid_json(json_text)
-
- # Lower case keys so that "databases" works as a
- # key even if the JSON has "Databases" as a key.
- parsed_json = lower_keys_dict(valid_json)
- remove_passwords_from_config(parsed_json)
-
- # Append a trailing \n here to ensure there's adequate separation in sync_gateway.log
- formatted_json_string = json.dumps(parsed_json, indent=4) + "\n"
- return formatted_json_string
-
- except Exception as e:
- if log_json_parsing_exceptions:
- print("Exception trying to remove passwords from {0}. Exception: {1}".format(json_text, e))
- traceback.print_exc()
- return '{"Error":"Error in sgcollect_info password_remover.py trying to remove passwords. See logs for details"}'
-
-
-def lower_keys_dict(json_text):
- """Deserialize the given JSON document to a Python dictionary and
- transform all keys to lower case.
- """
- def iterate(k):
- return lower_level(k) if isinstance(k, dict) else k
-
- def lower(k):
- return k.lower() if isinstance(k, str) else k
-
- def lower_level(kv):
- return dict((lower(k), iterate(v)) for k, v in kv.items())
-
- json_dict = json.loads(json_text)
- return lower_level(json_dict)
-
-
-def pretty_print_json(json_text):
- """
- Content postprocessor that pretty prints JSON.
- Returns original string with a trailing \n (to ensure separation in sync_gateway.log) if formatting fails
- """
- try:
- json_text = json.dumps(json.loads(json_text), indent=4)
- except Exception as e:
- print("Exception trying to parse JSON {0}. Exception: {1}".format(json_text, e))
- return json_text + "\n"
-
-
-def strip_password_from_url(url_string):
- """
- Given a URL string like:
-
- http://bucket-1:foobar@localhost:8091
-
- Strip out the password and return:
-
- http://bucket-1:@localhost:8091
-
- """
-
- parsed_url = urlparse(url_string)
- if parsed_url.username is None and parsed_url.password is None:
- return url_string
-
- new_url = "{0}://{1}:*****@{2}:{3}/{4}".format(
- parsed_url.scheme,
- parsed_url.username,
- parsed_url.hostname,
- parsed_url.port,
- parsed_url.query
- )
- return new_url
-
-
-def escape_json_value(raw_value):
- """
- Escape all invalid json characters like " to produce a valid json value
-
- Before:
-
- function(doc, oldDoc) { if (doc.type == "reject_me") {
-
- After:
-
- function(doc, oldDoc) { if (doc.type == \"reject_me\") {
-
- """
- escaped = raw_value
- escaped = escaped.replace('\\', "\\\\") # Escape any backslashes
- escaped = escaped.replace('"', '\\"') # Escape double quotes
- escaped = escaped.replace("'", "\\'") # Escape single quotes
-
- # TODO: other stuff should be escaped like \n \t and other control characters
- # See http://stackoverflow.com/questions/983451/where-can-i-find-a-list-of-escape-characters-required-for-my-json-ajax-return-ty
-
- return escaped
-
-
-def convert_to_valid_json(invalid_json):
-
- STATE_OUTSIDE_BACKTICK = "STATE_OUTSIDE_BACKTICK"
- STATE_INSIDE_BACKTICK = "STATE_INSIDE_BACKTICK"
- state = STATE_OUTSIDE_BACKTICK
- output = []
- sync_function_buffer = []
-
- try:
- invalid_json = invalid_json.decode('utf-8')
- except (UnicodeDecodeError, AttributeError):
- pass
-
- # Strip newlines
- invalid_json = invalid_json.replace('\n', '')
-
- # Strip tabs
- invalid_json = invalid_json.replace('\t', '')
-
- # read string char by char
- for json_char in invalid_json:
-
- # if non-backtick character:
- if json_char != '`':
-
- # if in OUTSIDE_BACKTICK state
- if state == STATE_OUTSIDE_BACKTICK:
- # append char to output
- output.append(json_char)
-
- # if in INSIDE_BACKTICK state
- elif state == STATE_INSIDE_BACKTICK:
- # append to sync_function_buffer
- sync_function_buffer.append(json_char)
-
- # if backtick character
- elif json_char == '`':
-
- # if in OUTSIDE_BACKTICK state
- if state == STATE_OUTSIDE_BACKTICK:
- # transition to INSIDE_BACKTICK state
- state = STATE_INSIDE_BACKTICK
-
- # if in INSIDE_BACKTICK state
- elif state == STATE_INSIDE_BACKTICK:
- # run sync_function_buffer through escape_json_value()
- sync_function_buffer_str = "".join(sync_function_buffer)
- sync_function_buffer_str = escape_json_value(sync_function_buffer_str)
-
- # append to output
- output.append('"') # append a double quote
- output.append(sync_function_buffer_str)
- output.append('"') # append a double quote
-
- # empty the sync_function_buffer
- sync_function_buffer = []
-
- # transition to OUTSIDE_BACKTICK state
- state = STATE_OUTSIDE_BACKTICK
-
- output_str = "".join(output)
- return output_str
-
-
-class TestStripPasswordsFromUrl(unittest.TestCase):
-
- def basic_test(self):
- url_with_password = "http://bucket-1:foobar@localhost:8091"
- url_no_password = strip_password_from_url(url_with_password)
- assert "foobar" not in url_no_password
- assert "bucket-1" in url_no_password
-
-
-class TestRemovePasswords(unittest.TestCase):
-
- def test_basic(self):
- json_with_passwords = """
- {
- "log": ["*"],
- "databases": {
- "db2": {
- "server": "http://bucket-1:foobar@localhost:8091"
- },
- "db": {
- "server": "http://bucket4:foobar@localhost:8091",
- "bucket":"bucket-1",
- "username":"bucket-1",
- "password":"foobar",
- "users": { "Foo": { "password": "foobar", "disabled": false, "admin_channels": ["*"] } },
- "sync":
- `
- function(doc, oldDoc) {
- if (doc.type == "reject_me") {
- throw({forbidden : "Rejected document"})
- } else if (doc.type == "bar") {
- // add "bar" docs to the "important" channel
- channel("important");
- } else if (doc.type == "secret") {
- if (!doc.owner) {
- throw({forbidden : "Secret documents \ must have an owner field"})
- }
- } else {
- // all other documents just go into all channels listed in the doc["channels"] field
- channel(doc.channels)
- }
- }
- `
- }
- }
- }
- """
- with_passwords_removed = remove_passwords(json_with_passwords)
- assert "foobar" not in with_passwords_removed
-
- def test_alternative_config(self):
-
- sg_config = '{"Interface":":4984","AdminInterface":":4985","Facebook":{"Register":true},"Log":["*"],"Databases":{"todolite":{"server":"http://localhost:8091","pool":"default","bucket":"default","password":"foobar","name":"todolite","sync":"\\nfunction(doc, oldDoc) {\\n // NOTE this function is the same across the iOS, Android, and PhoneGap versions.\\n if (doc.type == \\"task\\") {\\n if (!doc.list_id) {\\n throw({forbidden : \\"Items must have a list_id.\\"});\\n }\\n channel(\\"list-\\"+doc.list_id);\\n } else if (doc.type == \\"list\\" || (doc._deleted \\u0026\\u0026 oldDoc \\u0026\\u0026 oldDoc.type == \\"list\\")) {\\n // Make sure that the owner propery exists:\\n var owner = oldDoc ? oldDoc.owner : doc.owner;\\n if (!owner) {\\n throw({forbidden : \\"List must have an owner.\\"});\\n }\\n\\n // Make sure that only the owner of the list can update the list:\\n if (doc.owner \\u0026\\u0026 owner != doc.owner) {\\n throw({forbidden : \\"Cannot change owner for lists.\\"});\\n }\\n\\n var ownerName = owner.substring(owner.indexOf(\\":\\")+1);\\n requireUser(ownerName);\\n\\n var ch = \\"list-\\"+doc._id;\\n if (!doc._deleted) {\\n channel(ch);\\n }\\n\\n // Grant owner access to the channel:\\n access(ownerName, ch);\\n\\n // Grant shared members access to the channel:\\n var members = !doc._deleted ? doc.members : oldDoc.members;\\n if (Array.isArray(members)) {\\n var memberNames = [];\\n for (var i = members.length - 1; i \\u003e= 0; i--) {\\n memberNames.push(members[i].substring(members[i].indexOf(\\":\\")+1))\\n };\\n access(memberNames, ch);\\n }\\n } else if (doc.type == \\"profile\\") {\\n channel(\\"profiles\\");\\n var user = doc._id.substring(doc._id.indexOf(\\":\\")+1);\\n if (user !== doc.user_id) {\\n throw({forbidden : \\"Profile user_id must match docid.\\"});\\n }\\n requireUser(user);\\n access(user, \\"profiles\\");\\n }\\n}\\n","users":{"GUEST":{"name":"","admin_channels":["*"],"all_channels":null,"disabled":true}}}}}'
-
- with_passwords_removed = remove_passwords(sg_config)
-
- assert "foobar" not in with_passwords_removed
-
- def test_config_fragment(self):
- db_config = """
- {
- "username": "bucket1",
- "name": "db",
- "bucket": "bucket1",
- "server": "http://localhost:8091",
- "password": "foobar",
- "pool": "default"
- }
- """
- with_passwords_removed = remove_passwords(db_config)
- assert "foobar" not in with_passwords_removed
- pass
-
- def test_non_parseable_config(self):
- """
- If a config is not JSON parseable, make sure passwords are not stored in result
- """
- unparseable_json_with_passwords = """
- {
- "log": ["*"],
- "databases": {
- "db2": {
- "server": "http://bucket-1:foobar@localhost:8091"
- },
- "db": {
- "server": "http://localhost:8091",
- "bucket":"bucket-1",
- "username":"bucket-1",
- "password":"foobar",
- "users": { "GUEST": { "disabled": false, "admin_channels": ["*"] } },
- "sync":
-
- function(doc, oldDoc) {
- if (doc.type == "reject_me") {
- throw({forbidden : "Rejected document"})
- } else if (doc.type == "bar") {
- // add "bar" docs to the "important" channel
- channel("important");
- } else if (doc.type == "secret") {
- if (!doc.owner) {
- throw({forbidden : "Secret documents \ must have an owner field"})
- }
- } else {
- // all other documents just go into all channels listed in the doc["channels"] field
- channel(doc.channels)
- }
- }
- `
- }
- }
- }
- """
- with_passwords_removed = remove_passwords(unparseable_json_with_passwords, log_json_parsing_exceptions=False)
- assert "foobar" not in with_passwords_removed
-
-
-class TestTagUserData(unittest.TestCase):
-
- def test_basic(self):
- json_with_userdata = """
- {
- "databases": {
- "db": {
- "server": "http://bucket4:foobar@localhost:8091",
- "bucket":"bucket-1",
- "username":"bucket-user",
- "password":"foobar",
- "users": {
- "FOO": {
- "password": "foobar",
- "disabled": false,
- "admin_channels": ["uber_secret_channel"]
- },
- "bar": { "password": "baz" }
- }
- },
- "db2": { "server": "http://bucket-1:foobar@localhost:8091" }
- }
- }
- """
- tagged = tag_userdata_in_server_config(json_with_userdata)
- assert "uber_secret_channel" in tagged
- assert "foo" in tagged # everything is lower cased
- assert "bucket-user" in tagged
-
- assert "baz" not in tagged # passwords shouldn't be tagged, they get removed
- assert "bucket-1" not in tagged # bucket name is actually metadata
-
-
-class TestConvertToValidJSON(unittest.TestCase):
-
- def basic_test(self):
-
- invalid_json = """
- {
- "log": ["*"],
- "databases": {
- "db": {
- "server": "walrus:",
- "users": { "GUEST": { "disabled": false, "admin_channels": ["*"] } },
- "sync":
- `
- function(doc, oldDoc) {
- if (doc.type == "reject_me") {
- throw({forbidden : "Rejected document"})
- } else if (doc.type == "bar") {
- // add "bar" docs to the "important" channel
- channel("important");
- } else if (doc.type == "secret") {
- if (!doc.owner) {
- throw({forbidden : "Secret documents \ must have an owner field"})
- }
- } else {
- // all other documents just go into all channels listed in the doc["channels"] field
- channel(doc.channels)
- }
- }
- `
- }
- }
- }
- """
-
- valid_json = convert_to_valid_json(invalid_json)
-
- got_exception = True
- try:
- parsed_json = json.loads(valid_json)
- json.dumps(parsed_json, indent=4)
- got_exception = False
- except Exception as e:
- print("Exception: {0}".format(e))
-
- assert got_exception is False, "Failed to convert to valid JSON"
-
- def basic_test_two_sync_functions(self):
-
- invalid_json = """
- {
- "log": ["*"],
- "databases": {
- "db": {
- "server": "walrus:",
- "users": { "GUEST": { "disabled": false, "admin_channels": ["*"] } },
- "sync":
- `
- function(doc, oldDoc) {
- if (doc.type == "reject_me") {
- throw({forbidden : "Rejected document"})
- } else if (doc.type == "bar") {
- // add "bar" docs to the "important" channel
- channel("important");
- } else if (doc.type == "secret") {
- if (!doc.owner) {
- throw({forbidden : "Secret documents \ must have an owner field"})
- }
- } else {
- // all other documents just go into all channels listed in the doc["channels"] field
- channel(doc.channels)
- }
- }
- `
- },
- "db2": {
- "server": "walrus:",
- "users": { "GUEST": { "disabled": false, "admin_channels": ["*"] } },
- "sync":
- `
- function(doc, oldDoc) {
- if (doc.type == "reject_me") {
- throw({forbidden : "Rejected document"})
- } else if (doc.type == "bar") {
- // add "bar" docs to the "important" channel
- channel("important");
- } else if (doc.type == "secret") {
- if (!doc.owner) {
- throw({forbidden : "Secret documents \ must have an owner field"})
- }
- } else {
- // all other documents just go into all channels listed in the doc["channels"] field
- channel(doc.channels)
- }
- }
- `
- },
- }
- }
- """
-
- valid_json = convert_to_valid_json(invalid_json)
-
- got_exception = True
- try:
- parsed_json = json.loads(valid_json)
- json.dumps(parsed_json, indent=4)
- got_exception = False
- except Exception as e:
- print("Exception: {0}".format(e))
-
- assert got_exception is False, "Failed to convert to valid JSON"
-
-
-class TestLowerKeys(unittest.TestCase):
-
- def test_basic(self):
- json_text_input = """{
- "Name": "Couchbase, Inc.",
- "Address": {
- "Street": "3250 Olcott St",
- "City": "Santa Clara",
- "State": "CA",
- "Zip_Code": 95054
- },
- "Products": [
- "Couchbase Server",
- "Sync Gateway",
- "Couchbase Lite"
- ]
- }"""
- json_dict_actual = lower_keys_dict(json_text_input)
- json_dict_expected = json.loads("""{
- "name": "Couchbase, Inc.",
- "address": {
- "street": "3250 Olcott St",
- "city": "Santa Clara",
- "state": "CA",
- "zip_code": 95054
- },
- "products": [
- "Couchbase Server",
- "Sync Gateway",
- "Couchbase Lite"
- ]
- }""")
-
- # Sort the lists(if any) in both dictionaries before dumping to a string.
- json_text_actual = json.dumps(json_dict_actual, sort_keys=True)
- json_text_expected = json.dumps(json_dict_expected, sort_keys=True)
- assert json_text_expected == json_text_actual
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tools/sgcollect/main_test.go b/tools/sgcollect/main_test.go
new file mode 100644
index 0000000000..5fd0b3ab79
--- /dev/null
+++ b/tools/sgcollect/main_test.go
@@ -0,0 +1,22 @@
+/*
+Copyright 2020-Present Couchbase, Inc.
+
+Use of this software is governed by the Business Source License included in
+the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
+file, in accordance with the Business Source License, use of this software will
+be governed by the Apache License, Version 2.0, included in the file
+licenses/APL2.txt.
+*/
+
+package main
+
+import (
+ "testing"
+
+ "github.com/couchbase/sync_gateway/db"
+)
+
+func TestMain(m *testing.M) {
+ memWatermarkThresholdMB := uint64(2048)
+ db.TestBucketPoolWithIndexes(m, memWatermarkThresholdMB)
+}
diff --git a/tools/sgcollect/password_remover.go b/tools/sgcollect/password_remover.go
new file mode 100644
index 0000000000..6cac5f83ac
--- /dev/null
+++ b/tools/sgcollect/password_remover.go
@@ -0,0 +1,141 @@
+package main
+
+import (
+ "fmt"
+ "net/url"
+ "reflect"
+)
+
+func walkInner(val reflect.Value, fullKey string, walker func(key, fullKey string, m map[string]any) bool) error {
+ switch val.Kind() {
+ case reflect.Map:
+ if val.Type().Key().Kind() != reflect.String {
+ return fmt.Errorf("walkInner %s: invalid key type %v", fullKey, val.Type().Key())
+ }
+ for _, k := range val.MapKeys() {
+ newFullKey := fmt.Sprintf("%s.%s", fullKey, k.String())
+ if newFullKey[0] == '.' {
+ newFullKey = newFullKey[1:]
+ }
+ if walker(k.String(), newFullKey, val.Interface().(map[string]any)) {
+ mv := val.MapIndex(k)
+ if mv.Kind() == reflect.Interface || mv.Kind() == reflect.Ptr {
+ mv = mv.Elem()
+ }
+ if mv.Kind() != reflect.Map {
+ continue
+ }
+ err := walkInner(mv, newFullKey, walker)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ case reflect.Slice:
+ for i := 0; i < val.Len(); i++ {
+ v := val.Index(i)
+ if v.Kind() == reflect.Interface || v.Kind() == reflect.Ptr {
+ v = v.Elem()
+ }
+ if v.Kind() != reflect.Map && v.Kind() != reflect.Slice {
+ continue
+ }
+ err := walkInner(v, fmt.Sprintf("%s[%d]", fullKey, i), walker)
+ if err != nil {
+ return err
+ }
+ }
+ default:
+ return fmt.Errorf("walkInner %s: invalid type %v", fullKey, val.Type())
+ }
+ return nil
+}
+
+// walkJSON walks a decoded JSON object from json.Unmarshal (called on a map[string]any value).
+// It calls the given walker on every nested object field. walkJSON steps through arrays, i.e., if the arrays themselves
+// contain objects it will call walker on them, but it will not call walker on other array members.
+// The walker will be called with the current value's key, the full dot-separated path to it, and the map itself.
+// Any changes to the map by the walker other than modifying a scalar value (such as inserting or removing elements)
+// may or may not be visible in subsequent calls to the walker.
+// The walker can return false to avoid recursing further into the current object.
+func walkJSON(m any, walker func(key, fullKey string, m map[string]any) bool) error {
+ return walkInner(reflect.ValueOf(m), "", walker)
+}
+
+const maskedPassword = "*****"
+
+func RemovePasswordsAndTagUserData(val map[string]any) error {
+ return walkJSON(val, func(key, fullKey string, m map[string]any) bool {
+ switch key {
+ case "password":
+ m[key] = maskedPassword
+ return false
+ case "server":
+ if sv, ok := m[key].(string); ok {
+ m[key] = stripPasswordFromURL(sv)
+ }
+ return false
+ case "username":
+ if sv, ok := m[key].(string); ok {
+ m[key] = UD(sv)
+ }
+ return false
+ case "users", "roles":
+ // We need to modify map keys, so the usual recursion behaviour isn't sufficient
+ m = m[key].(map[string]any)
+ for k, v := range m {
+ if len(k) > 4 && k[:4] == "" {
+ continue
+ }
+ userVal := v
+ m[UD(k)] = userVal
+ delete(m, k)
+ }
+ for _, v := range m {
+ userInfo, ok := v.(map[string]any)
+ if !ok {
+ return false
+ }
+ if name, ok := userInfo["name"].(string); ok {
+ userInfo["name"] = UD(name)
+ }
+ if _, ok := userInfo["password"]; ok {
+ userInfo["password"] = maskedPassword
+ }
+ if chans, ok := userInfo["admin_channels"].([]any); ok {
+ for i, ch := range chans {
+ if strVal, ok := ch.(string); ok {
+ chans[i] = UD(strVal)
+ }
+ }
+ }
+ if roles, ok := userInfo["admin_roles"].([]any); ok {
+ for i, role := range roles {
+ if strVal, ok := role.(string); ok {
+ roles[i] = UD(strVal)
+ }
+ }
+ }
+ }
+ return false
+ default:
+ return true
+ }
+ })
+}
+
+func stripPasswordFromURL(urlStr string) string {
+ urlVal, err := url.Parse(urlStr)
+ if err != nil {
+ return urlStr
+ }
+ if _, ok := urlVal.User.Password(); !ok {
+ return urlStr
+ }
+ return fmt.Sprintf(`%s://%s:%s@%s%s`,
+ urlVal.Scheme, urlVal.User.Username(), maskedPassword, urlVal.Host, urlVal.Path)
+}
+
+func UD(s string) string {
+ return "" + s + ""
+}
diff --git a/tools/sgcollect/password_remover_test.go b/tools/sgcollect/password_remover_test.go
new file mode 100644
index 0000000000..ebb6e3d756
--- /dev/null
+++ b/tools/sgcollect/password_remover_test.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+ "testing"
+
+ "github.com/couchbase/sync_gateway/base"
+ "github.com/stretchr/testify/require"
+)
+
+func mustParseJSON(t *testing.T, val string) map[string]any {
+ var ret map[string]any
+ err := base.JSONUnmarshal([]byte(val), &ret)
+ require.NoError(t, err)
+ return ret
+}
+
+func TestRemovePasswordsAndTagUserData(t *testing.T) {
+ data := mustParseJSON(t, `{
+ "databases": {
+ "foo": {
+ "bucket": "foo",
+ "username": "Administrator",
+ "password": "longpassword",
+ "server": "couchbase://foo:bar@cbserver:8091/invalid",
+ "roles": {
+ "bar": {
+ "admin_channels": ["beans"]
+ }
+ },
+ "users": {
+ "GUEST": {
+ "name": "guest",
+ "password": "securepassword",
+ "admin_channels": ["foo", "bar"],
+ "admin_roles": ["baz"]
+ }
+ }
+ }
+ }
+}`)
+ require.NoError(t, RemovePasswordsAndTagUserData(data))
+ require.Equal(t, mustParseJSON(t, `{
+ "databases": {
+ "foo": {
+ "bucket": "foo",
+ "username": "Administrator",
+ "password": "*****",
+ "server": "couchbase://foo:*****@cbserver:8091/invalid",
+ "roles": {
+ "bar": {
+ "admin_channels": ["beans"]
+ }
+ },
+ "users": {
+ "GUEST": {
+ "name": "guest",
+ "password": "*****",
+ "admin_channels": ["foo", "bar"],
+ "admin_roles": ["baz"]
+ }
+ }
+ }
+ }
+}`), data)
+}
diff --git a/tools/sgcollect/redact.go b/tools/sgcollect/redact.go
new file mode 100644
index 0000000000..41e873fb46
--- /dev/null
+++ b/tools/sgcollect/redact.go
@@ -0,0 +1,134 @@
+package main
+
+import (
+ "bufio"
+ "crypto/sha1"
+ "encoding/hex"
+ "errors"
+ "io"
+ "log"
+)
+
+// CopyFunc is the signature of io.Copy.
+type CopyFunc func(io.Writer, io.Reader) (int64, error)
+
+// RedactCopier returns a CopyFunc appropriate for the configured redaction level.
+func RedactCopier(opts *SGCollectOptions) CopyFunc {
+ if opts.LogRedactionLevel == RedactNone {
+ return io.Copy
+ }
+ // implementation of io.Copy that also redacts UD data
+ return func(dst io.Writer, src io.Reader) (int64, error) {
+ var written int64
+ var err error
+
+ flush := func(chunk []byte) error {
+ nw, wErr := dst.Write(chunk)
+ if nw < 0 || nw < len(chunk) {
+ nw = 0
+ if wErr == nil {
+ wErr = errors.New("invalid write")
+ }
+ }
+ written += int64(nw)
+ if errors.Is(wErr, io.EOF) {
+ wErr = nil // match the io.Copy protocol
+ }
+ if wErr != nil {
+ return wErr
+ }
+ if len(chunk) != nw {
+ return errors.New("short write")
+ }
+ return nil
+ }
+
+ br := bufio.NewReader(src)
+ var tmp []byte
+ redactBuf := make([]byte, 0, 32*1024)
+ depth := 0
+ for {
+ chunk, readErr := br.ReadBytes('<')
+ if errors.Is(readErr, io.EOF) {
+ if depth > 0 {
+ log.Println("WARN: mismatched UD tag")
+ err = flush(append([]byte(""), chunk...))
+ } else {
+ err = flush(chunk)
+ }
+ break
+ }
+ if readErr != nil {
+ err = readErr
+ break
+ }
+ // Check if the next tag is an opening or closing tag.
+ tmp, err = br.Peek(4)
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ log.Printf("WARN: Corrupt redaction tag")
+ err = flush(chunk)
+ if err != nil {
+ break
+ }
+ continue
+ }
+ err = readErr
+ break
+ }
+ if string(tmp[:3]) == "ud>" {
+ // opening
+ if depth == 0 {
+ // need to first write out everything up to the opening <
+ err = flush(chunk[:len(chunk)-1])
+ if err != nil {
+ break
+ }
+ // and then discard the remainder of the opening tag, as it doesn't get redacted (its contents do)
+ _, err = br.Discard(3)
+ if err != nil {
+ err = readErr
+ break
+ }
+ // now the br is just after the opening
+ } else {
+ // need to push the entire chunk into the redact buffer, *including* this opening because it's nested
+ redactBuf = append(redactBuf, chunk...)
+ }
+ depth++
+ // continue reading until we either hit the end of the source or find the closing UD
+ continue
+ } else if string(tmp[:4]) == "/ud>" {
+ // closing
+ depth--
+ if depth == 0 {
+ // chunk will now be the complete redactable area, because we discard everything up to it, plus the
+ // closing >.
+ _, err = br.Discard(4)
+ if err != nil {
+ err = readErr
+ break
+ }
+ // now the br is just after the closing
+ redactBuf = append(redactBuf, chunk[:len(chunk)-1]...)
+ sumInput := append([]byte(opts.LogRedactionSalt), redactBuf...)
+ digest := sha1.Sum(sumInput) //nolint:gosec
+ chunk = append(append([]byte(""), hex.EncodeToString(digest[:])...), []byte("")...)
+ redactBuf = make([]byte, 0, 32*1024)
+ }
+ }
+ // it's not an opening tag, either it's a closing tag or not a tag we care about
+ // if we're inside a redaction tag, it needs to get added to the redaction buffer, otherwise it can go
+ // out as it is
+ if depth > 0 {
+ redactBuf = append(redactBuf, chunk...)
+ } else {
+ err = flush(chunk)
+ if err != nil {
+ break
+ }
+ }
+ }
+ return written, err
+ }
+}
diff --git a/tools/sgcollect/redact_test.go b/tools/sgcollect/redact_test.go
new file mode 100644
index 0000000000..620025a3b2
--- /dev/null
+++ b/tools/sgcollect/redact_test.go
@@ -0,0 +1,131 @@
+package main
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "encoding/hex"
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestRedactCopy(t *testing.T) {
+ opts := &SGCollectOptions{
+ LogRedactionSalt: "SALT",
+ }
+ redacted := func(in string) string {
+ digest := sha1.Sum([]byte(string(opts.LogRedactionSalt) + in)) //nolint:gosec
+ return hex.EncodeToString(digest[:])
+ }
+ cases := []struct {
+ Name string
+ Input, Expected string
+ }{
+ {
+ Name: "no redaction",
+ Input: "foo bar",
+ Expected: "foo bar",
+ },
+ {
+ Name: "simple",
+ Input: "foo bar baz",
+ Expected: fmt.Sprintf("foo %s baz", redacted("bar")),
+ },
+ {
+ Name: "nested",
+ Input: "foo barbazqux baz",
+ Expected: fmt.Sprintf("foo %s baz", redacted("barbazqux")),
+ },
+ {
+ Name: "multiple",
+ Input: "foo bar baz qux",
+ Expected: fmt.Sprintf("foo %s baz %s", redacted("bar"), redacted("qux")),
+ },
+ {
+ Name: "only",
+ Input: "foo",
+ Expected: fmt.Sprintf("%s", redacted("foo")),
+ },
+ {
+ Name: "at start",
+ Input: "foo bar",
+ Expected: fmt.Sprintf("%s bar", redacted("foo")),
+ },
+ {
+ Name: "at end",
+ Input: "foo bar",
+ Expected: fmt.Sprintf("foo %s", redacted("bar")),
+ },
+ {
+ Name: "corrupt",
+ Input: "foo bar",
+ Expected: "foo bar",
+ },
+ }
+ for _, tc := range cases {
+ t.Run(tc.Name, func(t *testing.T) {
+ var buf bytes.Buffer
+ n, err := RedactCopier(opts)(&buf, strings.NewReader(tc.Input))
+ require.NoError(t, err)
+ require.Equal(t, tc.Expected, buf.String())
+ require.Equal(t, int64(buf.Len()), n)
+ })
+ }
+}
+
+func FuzzRedactCopy(f *testing.F) {
+ opts := &SGCollectOptions{
+ LogRedactionSalt: "SALT",
+ }
+ f.Add("foo bar")
+ f.Add("foo bar baz")
+ f.Fuzz(func(t *testing.T, in string) {
+ var buf bytes.Buffer
+ n, err := RedactCopier(opts)(&buf, strings.NewReader(in))
+ require.NoError(t, err)
+ require.Equal(t, int64(buf.Len()), n)
+ })
+}
+
+// Verifies that RedactCopier doesn't change its input if it has nothing to do.
+func FuzzRedactCopyIdempotent(f *testing.F) {
+ opts := &SGCollectOptions{
+ LogRedactionSalt: "SALT",
+ }
+ f.Add("foo bar")
+ f.Fuzz(func(t *testing.T, in string) {
+ if strings.Contains(in, "") && strings.Contains(in, "") {
+ t.SkipNow()
+ }
+ var buf bytes.Buffer
+ n, err := RedactCopier(opts)(&buf, strings.NewReader(in))
+ require.NoError(t, err)
+ require.Equal(t, int64(buf.Len()), n)
+ require.Equal(t, buf.String(), in)
+ })
+}
+
+func FuzzRedactCopyMiddle(f *testing.F) {
+ opts := &SGCollectOptions{
+ LogRedactionSalt: "SALT",
+ }
+ redacted := func(in string) string {
+ digest := sha1.Sum([]byte(string(opts.LogRedactionSalt) + in)) //nolint:gosec
+ return hex.EncodeToString(digest[:])
+ }
+ f.Add("foo", "bar", "baz")
+ f.Fuzz(func(t *testing.T, s1, s2, s3 string) {
+ var buf bytes.Buffer
+ n, err := RedactCopier(opts)(&buf, strings.NewReader(fmt.Sprintf("%s%s%s", s1, s2, s3)))
+ require.NoError(t, err)
+ require.Equal(t, int64(buf.Len()), n)
+ require.Equal(t, buf.String(), fmt.Sprintf("%s%s%s", s1, redacted(s2), s3))
+ })
+}
diff --git a/tools/sgcollect/sgcollect_info.go b/tools/sgcollect/sgcollect_info.go
new file mode 100644
index 0000000000..c4f780ea9d
--- /dev/null
+++ b/tools/sgcollect/sgcollect_info.go
@@ -0,0 +1,356 @@
+package main
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "net/url"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/google/uuid"
+ "gopkg.in/alecthomas/kingpin.v2"
+)
+
+type LogRedactionLevel string
+
+const (
+ RedactNone LogRedactionLevel = "none"
+ RedactPartial LogRedactionLevel = "partial"
+)
+
+// PasswordString is a string with marshallers that avoid accidentally printing it. It also makes it harder to accidentally
+// pass to callers that won't know how to properly handle it.
+type PasswordString string
+
+func (p PasswordString) GoString() string {
+ return strings.Repeat("*", len(p))
+}
+
+func (p PasswordString) MarshalText() ([]byte, error) {
+ return bytes.Repeat([]byte("*"), len(p)), nil
+}
+
+type SGCollectOptions struct {
+ OutputPath string
+ RootDir string
+ LogRedactionLevel LogRedactionLevel
+ LogRedactionSalt PasswordString
+ SyncGatewayURL *url.URL
+ SyncGatewayConfig string
+ SyncGatewayExecutable string
+ SyncGatewayUsername string
+ SyncGatewayPassword PasswordString
+ HTTPTimeout time.Duration
+ TmpDir string
+ UploadHost *url.URL
+ UploadCustomer string
+ UploadTicketNumber string
+ UploadProxy *url.URL
+}
+
+func (opts *SGCollectOptions) ParseCommandLine(args []string) error {
+ app := kingpin.New("sgcollect_info", "")
+ app.Flag("root-dir", "root directory of Sync Gateway installation").StringVar(&opts.RootDir)
+ app.Flag("log-redaction-level", "whether to redact logs. If enabled, two copies of the logs will be collected, one redacted and one unredacted.").
+ Default("none").EnumVar((*string)(&opts.LogRedactionLevel), "none", "partial")
+ app.Flag("log-redaction-salt", "salt to use when hashing user data in redacted logs. By default a random string is generated.").
+ Default(uuid.New().String()).StringVar((*string)(&opts.LogRedactionSalt))
+ app.Flag("sync-gateway-url", "URL of the admin interface of the running Sync Gateway").URLVar(&opts.SyncGatewayURL)
+ app.Flag("sync-gateway-username", "credentials for the Sync Gateway admin interfarce").StringVar(&opts.SyncGatewayUsername)
+ app.Flag("sync-gateway-password", "credentials for the Sync Gateway admin interfarce").StringVar((*string)(&opts.SyncGatewayPassword))
+ app.Flag("sync-gateway-config", "path to the Sync Gateway bootstrap configuration file. If left blank, will attempt to find automatically.").
+ ExistingFileVar(&opts.SyncGatewayConfig)
+ app.Flag("sync-gateway-executable", "path to the Sync Gateway binary. If left blank, will attempt to find automatically.").
+ ExistingFileVar(&opts.SyncGatewayExecutable)
+ app.Flag("http-timeout", "timeout for HTTP requests made by sgcollect_info. Does not apply to log uploads.").
+ Default("30s").DurationVar(&opts.HTTPTimeout)
+ app.Flag("tmp-dir", "temporary directory to use while gathering logs. If left blank, one will automatically be created.").ExistingDirVar(&opts.TmpDir)
+ app.Flag("upload-host", "server to upload logs to when instructed by Couchbase Technical Support").URLVar(&opts.UploadHost)
+ app.Flag("customer", "customer name to use in conjunction with upload-host").StringVar(&opts.UploadCustomer)
+ app.Flag("ticket", "ticket number to use in conjunction with upload-host").StringVar(&opts.UploadTicketNumber)
+ app.Flag("upload-proxy", "HTTP proxy to use when uploading logs").URLVar(&opts.UploadProxy)
+ app.Arg("path", "path to a ZIP file (will be created) to collect diagnostics into").Required().StringVar(&opts.OutputPath)
+ _, err := app.Parse(args)
+ return err
+}
+
+var (
+ httpClient *http.Client
+ httpClientInit sync.Once
+)
+
+func getHTTPClient(opts *SGCollectOptions) *http.Client {
+ httpClientInit.Do(func() {
+ httpClient = &http.Client{
+ Timeout: opts.HTTPTimeout,
+ }
+ })
+ return httpClient
+}
+
+func getJSONOverHTTP(url string, opts *SGCollectOptions, result any) error {
+ req, err := http.NewRequest(http.MethodGet, url, nil)
+ if err != nil {
+ return fmt.Errorf("failed to build HTTP request: %w", err)
+ }
+ req.SetBasicAuth(opts.SyncGatewayUsername, string(opts.SyncGatewayPassword))
+
+ res, err := getHTTPClient(opts).Do(req)
+ if err != nil {
+ return fmt.Errorf("failed to execute HTTP request: %w", err)
+ }
+ defer res.Body.Close()
+
+ err = json.NewDecoder(res.Body).Decode(result)
+ if err != nil {
+ return fmt.Errorf("failed to decode response body: %w", err)
+ }
+ return nil
+}
+
+// determineSGURL attempts to find the Sync Gateway admin interface URL, starting with the one given in the options, then
+// a default if one is not specified.
+// Returns true if the URL is valid and reachable.
+func determineSGURL(opts *SGCollectOptions) (*url.URL, bool) {
+ sgURL := opts.SyncGatewayURL
+ if sgURL == nil {
+ sgURL, _ = url.Parse("http://127.0.0.1:4985")
+ }
+ log.Printf("Trying Sync Gateway URL: %s", sgURL)
+
+ var root map[string]any
+ err := getJSONOverHTTP(sgURL.String(), opts, &root)
+ if err == nil {
+ return sgURL, true
+ }
+ log.Printf("Failed to communicate with %s: %v", sgURL, err)
+
+ // try HTTPS instead
+ httpsURL := *sgURL
+ httpsURL.Scheme = "https"
+ log.Printf("Trying Sync Gateway URL: %s", httpsURL.String())
+ err = getJSONOverHTTP(httpsURL.String(), opts, &root)
+ if err == nil {
+ return &httpsURL, true
+ }
+ log.Printf("Failed to communicate with %s: %v", httpsURL.String(), err)
+
+ return sgURL, false
+}
+
+func findSGBinaryAndConfigsFromExpvars(sgURL *url.URL, opts *SGCollectOptions) (string, string, bool) {
+ // Get path to sg binary (reliable) and config (not reliable)
+ var expvars struct {
+ CmdLine []string `json:"cmdline"`
+ }
+ err := getJSONOverHTTP(sgURL.String()+"/_expvar", opts, &expvars)
+ if err != nil {
+ log.Printf("findSGBinaryAndConfigsFromExpvars: Failed to get SG expvars: %v", err)
+ }
+
+ if len(expvars.CmdLine) == 0 {
+ return "", "", false
+ }
+
+ binary := expvars.CmdLine[0]
+ var config string
+ for _, arg := range expvars.CmdLine[1:] {
+ if strings.HasSuffix(arg, ".json") {
+ config = arg
+ break
+ }
+ }
+ return binary, config, config != ""
+}
+
+var sgBinPaths = [...]string{
+ "/opt/couchbase-sync-gateway/bin/sync_gateway",
+ `C:\Program Files (x86)\Couchbase\sync_gateway.exe`,
+ `C:\Program Files\Couchbase\Sync Gateway\sync_gateway.exe`,
+ "./sync_gateway",
+}
+
+var bootstrapConfigLocations = [...]string{
+ "/home/sync_gateway/sync_gateway.json",
+ "/opt/couchbase-sync-gateway/etc/sync_gateway.json",
+ "/opt/sync_gateway/etc/sync_gateway.json",
+ "/etc/sync_gateway/sync_gateway.json",
+ `C:\Program Files (x86)\Couchbase\serviceconfig.json`,
+ `C:\Program Files\Couchbase\Sync Gateway\serviceconfig.json`,
+ "./sync_gateway.json",
+}
+
+func findSGBinaryAndConfigs(sgURL *url.URL, opts *SGCollectOptions) (string, string) {
+ // If the user manually passed some in, use those.
+ binary := opts.SyncGatewayExecutable
+ config := opts.SyncGatewayConfig
+ if binary != "" && config != "" {
+ log.Printf("Using manually passed SG binary at %q and config at %q.", binary, config)
+ return binary, config
+ }
+
+ var ok bool
+ binary, config, ok = findSGBinaryAndConfigsFromExpvars(sgURL, opts)
+ if ok {
+ log.Printf("SG binary at %q and config at %q.", binary, config)
+ return binary, config
+ }
+
+ for _, path := range sgBinPaths {
+ if _, err := os.Stat(path); err == nil {
+ binary = path
+ break
+ }
+ }
+
+ for _, path := range bootstrapConfigLocations {
+ if _, err := os.Stat(path); err == nil {
+ config = path
+ break
+ }
+ }
+ log.Printf("SG binary at %q and config at %q.", binary, config)
+ return binary, config
+}
+
+func main() {
+ opts := &SGCollectOptions{}
+ if err := opts.ParseCommandLine(os.Args[1:]); err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+
+ tr, err := NewTaskRunner(opts)
+ if err != nil {
+ log.Fatal(err)
+ }
+ err = tr.SetupSGCollectLog()
+ if err != nil {
+ log.Printf("Failed to set up sgcollect_info.log: %v. Will continue.", err)
+ }
+
+ sgURL, ok := determineSGURL(opts)
+ if !ok {
+ log.Println("Failed to communicate with Sync Gateway. Check that Sync Gateway is reachable.")
+ log.Println("Will attempt to continue, but some information may be unavailable, which may make troubleshooting difficult.")
+ }
+
+ // Build path to zip directory, make sure it exists
+ zipFilename := opts.OutputPath
+ if !strings.HasSuffix(zipFilename, ".zip") {
+ zipFilename += ".zip"
+ }
+ zipDir := filepath.Dir(zipFilename)
+ _, err = os.Stat(zipDir)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ log.Fatalf("Output directory %s does not exist.", zipDir)
+ } else {
+ log.Fatalf("Failed to check if output directory (%s) is accesible: %v", zipDir, err)
+ }
+ }
+
+ shouldRedact := opts.LogRedactionLevel != RedactNone
+ var redactedZipFilename string
+ var uploadFilename string
+ if shouldRedact {
+ redactedZipFilename = strings.TrimSuffix(zipFilename, ".zip") + "-redacted.zip"
+ uploadFilename = redactedZipFilename
+ } else {
+ uploadFilename = zipFilename
+ }
+
+ var config ServerConfig
+ err = getJSONOverHTTP(sgURL.String()+"/_config?include_runtime=true", opts, &config)
+ if err != nil {
+ log.Printf("Failed to get SG config. Some information might not be collected.")
+ }
+
+ for _, task := range MakeAllTasks(sgURL, opts, config) {
+ tr.Run(task)
+ }
+
+ tr.Finalize()
+ log.Printf("Writing unredacted logs to %s", zipFilename)
+ hostname, _ := os.Hostname()
+ prefix := fmt.Sprintf("sgcollect_info_%s_%s", hostname, time.Now().Format("20060102-150405"))
+ err = tr.ZipResults(zipFilename, prefix, io.Copy)
+ if err != nil {
+ log.Printf("WARNING: failed to produce output file %s: %v", zipFilename, err)
+ }
+ if shouldRedact {
+ log.Printf("Writing redacted logs to %s", redactedZipFilename)
+ err = tr.ZipResults(redactedZipFilename, prefix, RedactCopier(opts))
+ if err != nil {
+ log.Printf("WARNING: failed to produce output file %s: %v", redactedZipFilename, err)
+ }
+ }
+
+ if opts.UploadHost != nil && opts.UploadCustomer != "" {
+ err = UploadFile(opts, uploadFilename)
+ if err != nil {
+ log.Printf("Uploading logs failed! %v", err)
+ log.Println("Please upload the logs manually, using the instructions given to you by Couchbase Technical Support.")
+ }
+ }
+
+ log.Println("Done.")
+}
+
+func UploadFile(opts *SGCollectOptions, uploadFilename string) error {
+ uploadURL := *opts.UploadHost
+ uploadURL.Path += fmt.Sprintf("/%s/", opts.UploadCustomer)
+ if opts.UploadTicketNumber != "" {
+ uploadURL.Path += fmt.Sprintf("%s/", opts.UploadTicketNumber)
+ }
+ uploadURL.Path += filepath.Base(uploadFilename)
+ log.Printf("Uploading archive to %s...", uploadURL.String())
+
+ fd, err := os.Open(uploadFilename)
+ if err != nil {
+ return fmt.Errorf("failed to prepare file for upload: %w", err)
+ }
+ defer fd.Close()
+ stat, err := fd.Stat()
+ if err != nil {
+ return fmt.Errorf("failed to stat upload file: %w", err)
+ }
+
+ req, err := http.NewRequest(http.MethodPut, uploadURL.String(), fd)
+ if err != nil {
+ return fmt.Errorf("failed to create upload request: %w", err)
+ }
+ req.Header.Set("Content-Type", "application/zip")
+ req.ContentLength = stat.Size()
+
+ var proxy func(*http.Request) (*url.URL, error)
+ if opts.UploadProxy != nil {
+ proxy = http.ProxyURL(opts.UploadProxy)
+ } else {
+ proxy = http.ProxyFromEnvironment
+ }
+ httpClient := &http.Client{
+ Transport: &http.Transport{
+ Proxy: proxy,
+ },
+ }
+ res, err := httpClient.Do(req)
+ if err != nil {
+ return fmt.Errorf("failed to perform request: %w", err)
+ }
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ log.Printf("WARN: upload gave unexpected status %s", res.Status)
+ body, _ := io.ReadAll(res.Body)
+ log.Println(string(body))
+ }
+ return nil
+}
diff --git a/tools/sgcollect/sgcollect_info_test.go b/tools/sgcollect/sgcollect_info_test.go
new file mode 100644
index 0000000000..0f9ba1b7c8
--- /dev/null
+++ b/tools/sgcollect/sgcollect_info_test.go
@@ -0,0 +1,89 @@
+package main
+
+import (
+ "archive/zip"
+ "bytes"
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "net/url"
+ "path/filepath"
+ "testing"
+
+ "github.com/couchbase/sync_gateway/rest"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestSGCollectTasks(t *testing.T) {
+ if testing.Short() {
+ t.Skip("Skipping in short mode")
+ }
+
+ restTesterConfig := rest.RestTesterConfig{DatabaseConfig: &rest.DatabaseConfig{DbConfig: rest.DbConfig{}}}
+ restTester := rest.NewRestTester(t, &restTesterConfig)
+ require.NoError(t, restTester.SetAdminParty(false))
+ defer restTester.Close()
+
+ mockSyncGateway := httptest.NewServer(restTester.TestAdminHandler())
+ defer mockSyncGateway.Close()
+ mockSyncGatewayURL, _ := url.Parse(mockSyncGateway.URL)
+
+ tasks := MakeAllTasks(mockSyncGatewayURL, &SGCollectOptions{}, ServerConfig{
+ Databases: map[string]any{
+ "db": restTester.DatabaseConfig.DbConfig,
+ },
+ })
+ tr := NewTaskTester(t, SGCollectOptions{})
+ for _, task := range tasks {
+ output, res := tr.RunTask(task)
+ tex := TaskEx(task)
+ if tex.mayFailTest {
+ if res.Error != nil {
+ t.Logf("Failed to run %s [%s] - marked as may fail, so not failing test", task.Name(), task.Header())
+ t.Logf("Error: %v", res.Error)
+ t.Logf("Output: %s", output.String())
+ }
+ } else {
+ if !AssertDidNotFail(t, res) {
+ t.Logf("Output: %s", output.String())
+ }
+ }
+ }
+}
+
+func TestCollectZipAndUpload(t *testing.T) {
+ uploadServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ assert.Equal(t, http.MethodPut, r.Method)
+ assert.Equal(t, "/sgwdev/12345/test.zip", r.URL.Path)
+ assert.NotZero(t, r.ContentLength)
+
+ body, err := io.ReadAll(r.Body)
+ require.NoError(t, err)
+ buf := bytes.NewReader(body)
+ reader, err := zip.NewReader(buf, int64(len(body)))
+ require.NoError(t, err)
+ assert.Len(t, reader.File, 1)
+ w.WriteHeader(http.StatusOK)
+ }))
+ defer uploadServer.Close()
+ uploadURL, _ := url.Parse(uploadServer.URL)
+ opts := &SGCollectOptions{
+ UploadHost: uploadURL,
+ UploadCustomer: "sgwdev",
+ UploadTicketNumber: "12345",
+ }
+ tr, err := NewTaskRunner(opts)
+ require.NoError(t, err)
+ tr.Run(&RawStringTask{
+ name: "test",
+ val: "test",
+ })
+ tr.Finalize()
+
+ tmpDir := t.TempDir()
+ tmpPath := filepath.Join(tmpDir, "test.zip")
+ require.NoError(t, tr.ZipResults(tmpPath, "test", io.Copy))
+
+ require.NoError(t, UploadFile(opts, tmpPath))
+}
diff --git a/tools/sgcollect/task_runner.go b/tools/sgcollect/task_runner.go
new file mode 100644
index 0000000000..93f3dad550
--- /dev/null
+++ b/tools/sgcollect/task_runner.go
@@ -0,0 +1,256 @@
+package main
+
+import (
+ "archive/zip"
+ "context"
+ "fmt"
+ "io"
+ "io/fs"
+ "log"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "time"
+)
+
+const defaultOutputFile = "sync_gateway.log"
+
+type TaskRunner struct {
+ tmpDir string
+ startTime time.Time
+ files map[string]*os.File
+ opts *SGCollectOptions
+}
+
+func NewTaskRunner(opts *SGCollectOptions) (*TaskRunner, error) {
+ tr := &TaskRunner{
+ startTime: time.Now(),
+ files: make(map[string]*os.File),
+ opts: opts,
+ }
+ var err error
+ tr.tmpDir, err = os.MkdirTemp(opts.TmpDir, fmt.Sprintf("sgcollect_info-%s-*", tr.startTime.Format("2006-01-02T15:04:05Z07")))
+ if err != nil {
+ return nil, fmt.Errorf("could not use temporary dir: %w", err)
+ }
+ log.Printf("Using temporary directory %s", tr.tmpDir)
+ return tr, nil
+}
+
+func (tr *TaskRunner) Finalize() {
+ log.Println("Task runner finalizing...")
+ log.SetOutput(os.Stderr)
+ for _, fd := range tr.files {
+ err := fd.Close()
+ if err != nil {
+ log.Printf("Failed to close %s: %v", fd.Name(), err)
+ }
+ }
+}
+
+func (tr *TaskRunner) Cleanup() error {
+ return os.RemoveAll(tr.tmpDir)
+}
+
+func (tr *TaskRunner) ZipResults(outputPath string, prefix string, copier CopyFunc) error {
+ fd, err := os.OpenFile(outputPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
+ if err != nil {
+ return fmt.Errorf("failed to reate output: %w", err)
+ }
+ defer func(fd *os.File) {
+ err := fd.Close()
+ if err != nil {
+ log.Printf("WARN: failed to close unredacted output file: %v", err)
+ }
+ }(fd)
+
+ zw := zip.NewWriter(fd)
+ defer func(zw *zip.Writer) {
+ err := zw.Close()
+ if err != nil {
+ log.Printf("WARN: failed to close unredacted output zipper: %v", err)
+ }
+ }(zw)
+
+ err = filepath.WalkDir(tr.tmpDir, func(path string, d fs.DirEntry, err error) error {
+ if d.IsDir() {
+ return nil
+ }
+ // Returning a non-nil error will stop the walker completely - we want to capture as much as we can.
+ fileFd, err := os.Open(path)
+ if err != nil {
+ log.Printf("WARN: failed to open %s: %v", path, err)
+ return nil
+ }
+ defer fileFd.Close()
+
+ zipPath := prefix + string(os.PathSeparator) + strings.TrimPrefix(path, tr.tmpDir+string(os.PathSeparator)) // TODO: properly remove prefix
+ zipFile, err := zw.Create(zipPath)
+ if err != nil {
+ log.Printf("WARN: failed to open %s in zip: %v", zipPath, err)
+ return nil
+ }
+ _, err = copier(zipFile, fileFd)
+ if err != nil {
+ log.Printf("WARN: failed to copy to %s in zip: %v", zipPath, err)
+ }
+ return nil
+ })
+ if err != nil {
+ return fmt.Errorf("walker error: %w", err)
+ }
+ return nil
+}
+
+// SetupSGCollectLog will redirect the standard library log package's output to both stderr and a log file in the temporary directory.
+// After calling this, make sure to call Finalize, which will undo the change.
+func (tr *TaskRunner) SetupSGCollectLog() error {
+ fd, err := tr.createFile("sgcollect_info.log")
+ if err != nil {
+ return fmt.Errorf("failed to create sgcollect_info.log: %w", err)
+ }
+ tr.files["sgcollect_info.log"] = fd
+ log.SetOutput(io.MultiWriter(os.Stderr, fd))
+ return nil
+}
+
+func (tr *TaskRunner) createFile(name string) (*os.File, error) {
+ path := filepath.Join(tr.tmpDir, name)
+ return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644)
+}
+
+func (tr *TaskRunner) writeHeader(w io.Writer, task SGCollectTask) error {
+ separator := strings.Repeat("=", 78)
+ // example:
+ // ==============================================================================
+ // Collect server status
+ // main.SGCollectTaskEx (main.URLTask): http://127.0.0.1:4985/_status
+ // ==============================================================================
+ var err error
+ if tex, ok := task.(SGCollectTaskEx); ok {
+ _, err = fmt.Fprintf(w, "%s\n%s\n%T (%T): %s\n%s\n", separator, task.Name(), task, tex.SGCollectTask, task.Header(), separator)
+ } else {
+ _, err = fmt.Fprintf(w, "%s\n%s\n%T: %s\n%s\n", separator, task.Name(), task, task.Header(), separator)
+ }
+ return err
+}
+
+func (tr *TaskRunner) Run(task SGCollectTask) {
+ tex := TaskEx(task)
+
+ skipReason, skip := ShouldSkip(task)
+ if skip {
+ log.Printf("SKIP %s [%s] - %s", task.Name(), task.Header(), skipReason)
+ return
+ }
+
+ outputFile := tex.outputFile
+ if outputFile == "" {
+ outputFile = defaultOutputFile
+ }
+ fd, ok := tr.files[outputFile]
+ if !ok {
+ var err error
+ fd, err = tr.createFile(outputFile)
+ if err != nil {
+ log.Printf("FAILed to run %q - failed to create file: %v", task.Name(), err)
+ return
+ }
+ tr.files[outputFile] = fd
+ }
+
+ if header := task.Header(); header != "" {
+ err := tr.writeHeader(fd, task)
+ if err != nil {
+ log.Printf("FAILed to run %q - failed to write header: %v", task.Name(), err)
+ return
+ }
+ }
+
+ res := ExecuteTask(tex, tr.opts, fd, func(s string) {
+ log.Println(s)
+ }, false)
+ if res.Error != nil {
+ log.Printf("FAIL %s [%s] - %v", task.Name(), task.Header(), res.Error)
+ _, _ = fmt.Fprintf(fd, "%s", res.Error)
+ }
+
+ _, err := fd.WriteString("\n")
+ if err != nil {
+ log.Printf("WARN %s [%s] - failed to write closing newline: %v", task.Name(), task.Header(), err)
+ }
+}
+
+type TaskExecutionResult struct {
+ Task SGCollectTask
+ SkipReason string
+ Error error
+}
+
+func ShouldSkip(task SGCollectTask) (string, bool) {
+ tex := TaskEx(task)
+ if !tex.ShouldRun(runtime.GOOS) {
+ return fmt.Sprintf("not executing on platform %s", runtime.GOOS), true
+ }
+ if tex.RequiresRoot() {
+ uid := os.Getuid()
+ if uid != -1 && uid != 0 {
+ return "requires root privileges", true
+ }
+ }
+ return "", false
+}
+
+func ExecuteTask(task SGCollectTask, opts *SGCollectOptions, output io.Writer, log func(string), failFast bool) TaskExecutionResult {
+ tex := TaskEx(task)
+ if reason, ok := ShouldSkip(task); ok {
+ return TaskExecutionResult{
+ Task: task,
+ SkipReason: reason,
+ }
+ }
+
+ run := func() (err error) {
+ defer func() {
+ if panicked := recover(); panicked != nil {
+ if recErr, ok := panicked.(error); ok {
+ err = recErr
+ } else {
+ err = fmt.Errorf("task panic: %v", panicked)
+ }
+ }
+ }()
+ ctx := context.Background()
+ if to := tex.Timeout(); to > 0 {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithTimeout(ctx, to)
+ defer cancel()
+ }
+ log(fmt.Sprintf("RUN %s [%s]", task.Name(), task.Header()))
+ return task.Run(ctx, opts, output)
+ }
+
+ var err error
+ if tex.NumSamples() > 0 {
+ for i := 0; i < tex.NumSamples(); i++ {
+ err = run()
+ if err != nil && failFast {
+ return TaskExecutionResult{
+ Task: task,
+ Error: err,
+ }
+ }
+ if i != tex.NumSamples()-1 {
+ log(fmt.Sprintf("Taking sample %d of %q [%s] after %v seconds", i+2, task.Name(), task.Header(), tex.Interval()))
+ time.Sleep(tex.Interval())
+ }
+ }
+ } else {
+ err = run()
+ }
+ return TaskExecutionResult{
+ Task: task,
+ Error: err,
+ }
+}
diff --git a/tools/sgcollect/task_types.go b/tools/sgcollect/task_types.go
new file mode 100644
index 0000000000..edb1c7bf0c
--- /dev/null
+++ b/tools/sgcollect/task_types.go
@@ -0,0 +1,449 @@
+package main
+
+import (
+ "bytes"
+ "compress/gzip"
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "os/exec"
+ "sync"
+ "time"
+)
+
+// SGCollectTask is the base implementation of a task.
+type SGCollectTask interface {
+ Name() string
+ Header() string
+ Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error
+}
+
+// SGCollectTaskEx adds metadata to a task, such as platforms it's applicable for.
+// This should not be created directly, instead use one of the helpers (such as Privileged or Sample), which take care
+// of setting the properties on the task if it is already a SGCollectTaskEx.
+type SGCollectTaskEx struct {
+ SGCollectTask
+ platforms []string
+ root bool
+ samples int
+ interval time.Duration
+ timeout time.Duration
+ outputFile string
+ noHeader bool
+ mayFailTest bool
+ removePasswords bool
+}
+
+// TaskEx wraps the given SGCollectTask in a SGCollectTaskEx, or returns it if it is already a SGCollectTaskEx.
+func TaskEx(t SGCollectTask) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ }
+}
+
+func (e SGCollectTaskEx) ShouldRun(platform string) bool {
+ if len(e.platforms) == 0 {
+ return true
+ }
+ for _, plat := range e.platforms {
+ if plat == platform {
+ return true
+ }
+ }
+ return false
+}
+
+func (e SGCollectTaskEx) RequiresRoot() bool {
+ return e.root
+}
+
+func (e SGCollectTaskEx) NumSamples() int {
+ return e.samples
+}
+
+func (e SGCollectTaskEx) Interval() time.Duration {
+ return e.interval
+}
+
+func (e SGCollectTaskEx) Timeout() time.Duration {
+ return e.timeout
+}
+
+func (e SGCollectTaskEx) Header() string {
+ if e.noHeader {
+ return ""
+ }
+ return e.SGCollectTask.Header()
+}
+
+func (e SGCollectTaskEx) Run(ctx context.Context, opts *SGCollectOptions, output io.Writer) error {
+ if !e.removePasswords {
+ return e.SGCollectTask.Run(ctx, opts, output)
+ }
+
+ var buf bytes.Buffer
+ err := e.SGCollectTask.Run(ctx, opts, &buf)
+ if err != nil {
+ return err
+ }
+
+ var jsonVal map[string]any
+ err = json.Unmarshal(buf.Bytes(), &jsonVal) // using stdlib json to avoid importing base and bloating the binary
+ if err != nil {
+ log.Printf("WARN %s [%s] - could not run password remover because the task produced invalid JSON. Check the output for password leaks.", e.Name(), e.Header())
+ _, err = buf.WriteTo(output)
+ return err
+ }
+
+ err = RemovePasswordsAndTagUserData(jsonVal)
+ if err != nil {
+ log.Printf("WARN %s [%s] - could not run password remover: %v. Check the output for password leaks.", e.Name(), e.Header(), err)
+ _, err = buf.WriteTo(output)
+ return err
+ }
+
+ // use a custom json.Encoder to avoid escaping angle brackets
+ encoder := json.NewEncoder(output)
+ encoder.SetEscapeHTML(false)
+ encoder.SetIndent("", "\t")
+ err = encoder.Encode(&jsonVal)
+ return err
+}
+
+func Sample(t SGCollectTask, samples int, interval time.Duration) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.samples = samples
+ ex.interval = interval
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ samples: samples,
+ interval: interval,
+ }
+}
+
+func Timeout(t SGCollectTask, timeout time.Duration) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.timeout = timeout
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ timeout: timeout,
+ }
+}
+
+func Privileged(t SGCollectTask) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.root = true
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ root: true,
+ }
+}
+
+func OverrideOutput(t SGCollectTask, out string) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.outputFile = out
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ outputFile: out,
+ }
+}
+
+func NoHeader(t SGCollectTask) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.noHeader = true
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ noHeader: true,
+ }
+}
+
+// MayFail marks this task as possibly failing in tests. This has no effect at runtime.
+func MayFail(t SGCollectTask) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.mayFailTest = true
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ mayFailTest: true,
+ }
+}
+
+func RemovePasswords(t SGCollectTask) SGCollectTaskEx {
+ if ex, ok := t.(SGCollectTaskEx); ok {
+ ex.removePasswords = true
+ return ex
+ }
+ return SGCollectTaskEx{
+ SGCollectTask: t,
+ removePasswords: true,
+ }
+}
+
+type URLTask struct {
+ name string
+ url string
+ timeout *time.Duration
+}
+
+func (c *URLTask) Name() string {
+ return c.name
+}
+
+func (c *URLTask) Header() string {
+ return c.url
+}
+
+func (c *URLTask) Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error {
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.url, nil)
+ if err != nil {
+ return fmt.Errorf("failed to build HTTP request: %w", err)
+ }
+ req.SetBasicAuth(opts.SyncGatewayUsername, string(opts.SyncGatewayPassword))
+
+ client := *getHTTPClient(opts)
+ if c.timeout != nil {
+ client.Timeout = *c.timeout
+ }
+ res, err := client.Do(req)
+ if err != nil {
+ return fmt.Errorf("failed to request: %w", err)
+ }
+ defer res.Body.Close()
+
+ // If the result is JSON, pretty-print it
+ if res.Header.Get("Content-Type") == "application/json" {
+ body, err := io.ReadAll(res.Body)
+ if err != nil {
+ return fmt.Errorf("failed to load response: %w", err)
+ }
+ var buf bytes.Buffer
+ err = json.Indent(&buf, body, "", "\t")
+ if err != nil {
+ log.Printf("WARN %s [%s] - failed to pretty-print JSON: %v", c.Name(), c.Header(), err)
+ _, err = out.Write(body)
+ return err
+ }
+ _, err = buf.WriteTo(out)
+ return err
+ }
+ _, err = io.Copy(out, res.Body)
+ if err != nil {
+ return fmt.Errorf("failed to load response: %w", err)
+ }
+ return nil
+}
+
+type FileTask struct {
+ name string
+ inputFile string
+}
+
+func (f *FileTask) Name() string {
+ return f.name
+}
+
+func (f *FileTask) Header() string {
+ return f.inputFile
+}
+
+func (f *FileTask) Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error {
+ fd, err := os.Open(f.inputFile)
+ if err != nil {
+ return fmt.Errorf("failed to open %q: %w", f.inputFile, err)
+ }
+ defer fd.Close()
+
+ _, err = io.Copy(out, fd)
+ if err != nil {
+ return fmt.Errorf("failed to copy contents of %q: %w", f.inputFile, err)
+ }
+ return nil
+}
+
+type GZipFileTask struct {
+ name string
+ inputFile string
+}
+
+func (f *GZipFileTask) Name() string {
+ return f.name
+}
+
+func (f *GZipFileTask) Header() string {
+ return f.inputFile
+}
+
+func (f *GZipFileTask) Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error {
+ fd, err := os.Open(f.inputFile)
+ if err != nil {
+ return fmt.Errorf("failed to open %q: %w", f.inputFile, err)
+ }
+ defer fd.Close()
+
+ unzipper, err := gzip.NewReader(fd)
+ if err != nil {
+ return fmt.Errorf("failed to decompress %q: %w", f.inputFile, err)
+ }
+ defer unzipper.Close()
+
+ _, err = io.Copy(out, unzipper) //nolint:gosec - we're copying our own files
+ if err != nil {
+ return fmt.Errorf("failed to copy contents of %q: %w", f.inputFile, err)
+ }
+ return nil
+}
+
+type OSCommandTask struct {
+ name string
+ command string
+ outputFile string
+}
+
+func (o *OSCommandTask) Name() string {
+ return o.name
+}
+
+func (o *OSCommandTask) Header() string {
+ return o.command
+}
+
+func (o *OSCommandTask) Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error {
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithCancel(ctx)
+ defer cancel()
+ cmd := exec.Command("sh", "-c", o.command) //nolint:gosec
+ stdout, err := cmd.StdoutPipe()
+ if err != nil {
+ return fmt.Errorf("stdout pipe: %w", err)
+ }
+ stderr, err := cmd.StderrPipe()
+ if err != nil {
+ return fmt.Errorf("stderr pipe: %w", err)
+ }
+ err = cmd.Start()
+ if err != nil {
+ return fmt.Errorf("failed to start: %w", err)
+ }
+
+ wg := sync.WaitGroup{}
+ wg.Add(4)
+ go func() {
+ defer wg.Done()
+ _, err := io.Copy(out, stdout)
+ if err != nil {
+ log.Printf("WARN %s [%s]: stdout copier error %v", o.name, o.Header(), err)
+ }
+ }()
+ go func() {
+ defer wg.Done()
+ _, err := io.Copy(out, stderr)
+ if err != nil {
+ log.Printf("WARN %s [%s]: stderr copier error %v", o.name, o.Header(), err)
+ }
+ }()
+ go func() {
+ defer wg.Done()
+ err = cmd.Wait()
+ cancel() // to release the below goroutine
+ }()
+ go func() {
+ defer wg.Done()
+ <-ctx.Done()
+ if cmd.Process != nil {
+ _ = cmd.Process.Kill()
+ }
+ }()
+ wg.Wait()
+ return err
+}
+
+type osTask struct {
+ OSCommandTask
+ platforms []string
+}
+
+func (o *osTask) ShouldRun(platform string) bool {
+ for _, p := range o.platforms {
+ if p == platform {
+ return true
+ }
+ }
+ return false
+}
+
+// OSTask is a helper to return a task for the given platform, which can be one of
+// "unix", "linux, "windows", "darwin", "solaris", or "" for all platforms.
+func OSTask(platform string, name, cmd string) SGCollectTask {
+ switch platform {
+ case "":
+ return &OSCommandTask{name, cmd, ""}
+ case "linux", "windows", "darwin":
+ return SGCollectTaskEx{
+ SGCollectTask: &OSCommandTask{name, cmd, ""},
+ platforms: []string{platform},
+ }
+ case "unix":
+ return SGCollectTaskEx{
+ SGCollectTask: &OSCommandTask{name, cmd, ""},
+ platforms: []string{"linux", "darwin", "freebsd", "netbsd", "openbsd"},
+ }
+ case "solaris":
+ return SGCollectTaskEx{
+ SGCollectTask: &OSCommandTask{name, cmd, ""},
+ platforms: []string{"illumos", "solaris"},
+ }
+ default:
+ panic(fmt.Sprintf("unknown platform %s", platform))
+ }
+}
+
+type SGCollectOptionsTask struct{}
+
+func (s SGCollectOptionsTask) Name() string {
+ return "sgcollect_info options"
+}
+
+func (s SGCollectOptionsTask) Header() string {
+ return ""
+}
+
+func (s SGCollectOptionsTask) Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error {
+ _, err := fmt.Fprintf(out, "%#v\n", opts)
+ return err
+}
+
+type RawStringTask struct {
+ name string
+ val string
+}
+
+func (s RawStringTask) Name() string {
+ return s.name
+}
+
+func (s RawStringTask) Header() string {
+ return ""
+}
+
+func (s RawStringTask) Run(ctx context.Context, opts *SGCollectOptions, out io.Writer) error {
+ _, err := fmt.Fprintf(out, "%s\n", s.val)
+ return err
+}
diff --git a/tools/sgcollect/task_types_test.go b/tools/sgcollect/task_types_test.go
new file mode 100644
index 0000000000..92a1046050
--- /dev/null
+++ b/tools/sgcollect/task_types_test.go
@@ -0,0 +1,64 @@
+package main
+
+import (
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestFileTask(t *testing.T) {
+ tmpdir := t.TempDir()
+ const testFileName = "testFile"
+ testFilePath := filepath.Join(tmpdir, testFileName)
+ require.NoError(t, ioutil.WriteFile(testFilePath, []byte("test data"), 0600))
+
+ tt := NewTaskTester(t, SGCollectOptions{})
+ task := &FileTask{
+ name: "Test",
+ inputFile: testFilePath,
+ }
+ buf, res := tt.RunTask(task)
+ AssertRan(t, res)
+ assert.Equal(t, buf.String(), "test data")
+}
+
+func TestURLTask(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/plain")
+ w.WriteHeader(http.StatusOK)
+ _, _ = w.Write([]byte(`{"foo": "bar"}`))
+ }))
+ defer server.Close()
+
+ tt := NewTaskTester(t, SGCollectOptions{})
+ task := &URLTask{
+ name: "Test",
+ url: server.URL + "/",
+ }
+ buf, res := tt.RunTask(task)
+ AssertRan(t, res)
+ assert.Equal(t, buf.String(), `{"foo": "bar"}`)
+}
+
+func TestURLTaskJSONPrettified(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ _, _ = w.Write([]byte(`{"foo": "bar"}`))
+ }))
+ defer server.Close()
+
+ tt := NewTaskTester(t, SGCollectOptions{})
+ task := &URLTask{
+ name: "Test",
+ url: server.URL + "/",
+ }
+ buf, res := tt.RunTask(task)
+ AssertRan(t, res)
+ assert.Equal(t, buf.String(), "{\n\t\"foo\": \"bar\"\n}")
+}
diff --git a/tools/sgcollect/tasks.go b/tools/sgcollect/tasks.go
new file mode 100644
index 0000000000..419340e59d
--- /dev/null
+++ b/tools/sgcollect/tasks.go
@@ -0,0 +1,242 @@
+package main
+
+import (
+ "errors"
+ "fmt"
+ "log"
+ "net/url"
+ "os"
+ "path/filepath"
+ "time"
+)
+
+func makeOSTasks() []SGCollectTask {
+ return []SGCollectTask{
+ OSTask("unix", "uname", "uname -a"),
+ OSTask("unix", "time and TZ", "date; date -u"),
+ Timeout(OSTask("unix", "ntp time", "ntpdate -q pool.ntp.org || nc time.nist.gov 13 || netcat time.nist.gov 13"), 60*time.Second),
+ MayFail(OSTask("unix", "ntp peers", "ntpq -p")), // ntpq is not present on macOS Mojave and above
+ MayFail(OSTask("unix", "raw /etc/sysconfig/clock", "cat /etc/sysconfig/clock")), // /etc/sysconfig/clock may not be present on macOS
+ MayFail(OSTask("unix", "raw /etc/timezone", "cat /etc/timezone")), // /etc/sysconfig/clock may not be present on macOS
+ OSTask("windows", "System information", "systeminfo"),
+ OSTask("windows", "Computer system", "wmic computersystem"),
+ OSTask("windows", "Computer OS", "wmic os"),
+ OSTask("linux", "System Hardware", "lshw -json || lshw"),
+ OSTask("solaris", "Process list snapshot", "prstat -a -c -n 100 -t -v -L 1 10"),
+ OSTask("solaris", "Process list", "ps -ef"),
+ OSTask("solaris", "Service configuration", "svcs -a"),
+ OSTask("solaris", "Swap configuration", "swap -l"),
+ OSTask("solaris", "Disk activity", "zpool iostat 1 10"),
+ OSTask("solaris", "Disk activity", "iostat -E 1 10"),
+ OSTask("linux", "Process list snapshot", "export TERM=''; top -Hb -n1 || top -H n1"),
+ OSTask("linux", "Process list", "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,maj_flt,min_flt,pri,nice,vsize,rss,tty,stat,wchan:12,start,bsdtime,command"),
+ OSTask("linux", "Raw /proc/vmstat", "cat /proc/vmstat"),
+ OSTask("linux", "Raw /proc/mounts", "cat /proc/mounts"),
+ OSTask("linux", "Raw /proc/partitions", "cat /proc/partitions"),
+ Sample(OSTask("linux", "Raw /proc/diskstats", "cat /proc/diskstats"), 10, time.Second),
+ OSTask("linux", "Raw /proc/interrupts", "cat /proc/interrupts"),
+ OSTask("linux", "Swap configuration", "free -t"),
+ OSTask("linux", "Swap configuration", "swapon -s"),
+ OSTask("linux", "Kernel modules", "lsmod"),
+ OSTask("linux", "Distro version", "cat /etc/redhat-release"),
+ OSTask("linux", "Distro version", "lsb_release -a"),
+ OSTask("linux", "Distro version", "cat /etc/SuSE-release"),
+ OSTask("linux", "Distro version", "cat /etc/issue"),
+ OSTask("linux", "Installed software", "rpm -qa"),
+ OSTask("linux", "Installed software", "COLUMNS=300 dpkg -l"),
+ OSTask("linux", "Extended iostat", "iostat -x -p ALL 1 10 || iostat -x 1 10"),
+ OSTask("linux", "Core dump settings", "find /proc/sys/kernel -type f -name '*core*' -print -exec cat '{}' ';'"),
+ OSTask("unix", "sysctl settings", "sysctl -a"),
+ OSTask("linux", "lsof output", "echo sync_gateway | xargs -n1 pgrep | xargs -n1 -r -- lsof -n -p"),
+ OSTask("linux", "LVM info", "lvdisplay"),
+ OSTask("linux", "LVM info", "vgdisplay"),
+ OSTask("linux", "LVM info", "pvdisplay"),
+ OSTask("darwin", "Process list snapshot", "top -l 1"),
+ OSTask("darwin", "Disk activity", "iostat 1 10"),
+ OSTask("darwin", "Process list", "ps -Aww -o user,pid,ppid,pcpu,pri,nice,vsize,rss,tty,stat,start,command"),
+ OSTask("windows", "Installed software", "wmic product get name, version"),
+ OSTask("windows", "Service list", "wmic service where state=\"running\" GET caption, name, state"),
+ OSTask("windows", "Process list", "wmic process"),
+ OSTask("windows", "Process usage", "tasklist /V /fo list"),
+ OSTask("windows", "Swap settings", "wmic pagefile"),
+ OSTask("windows", "Disk partition", "wmic partition"),
+ OSTask("windows", "Disk volumes", "wmic volume"),
+ Sample(OSTask("unix", "Network configuration", "ifconfig -a"), 2, 10*time.Second),
+ OSTask("linux", "Network configuration", "echo link addr neigh rule route netns | xargs -n1 -- sh -x -c 'ip $1 list' --"),
+ Sample(OSTask("windows", "Network configuration", "ipconfig /all"), 2, 10*time.Second),
+ OSTask("linux", "Raw /proc/net/dev", "cat /proc/net/dev"),
+ OSTask("linux", "Network link statistics", "ip -s link"),
+ OSTask("unix", "Network status", "netstat -anp || netstat -an"),
+ OSTask("windows", "Network status", "netstat -ano"),
+ OSTask("unix", "Network routing table", "netstat -rn"),
+ OSTask("linux", "Network socket statistics", "ss -an"),
+ OSTask("linux", "Extended socket statistics", "ss -an --info --processes"),
+ OSTask("unix", "Arp cache", "arp -na"),
+ OSTask("linux", "Iptables dump", "iptables-save"),
+ OSTask("unix", "Raw /etc/hosts", "cat /etc/hosts"),
+ OSTask("unix", "Raw /etc/resolv.conf", "cat /etc/resolv.conf"),
+ OSTask("linux", "Raw /etc/nsswitch.conf", "cat /etc/nsswitch.conf"),
+ OSTask("windows", "Arp cache", "arp -a"),
+ OSTask("windows", "Network Interface Controller", "wmic nic"),
+ OSTask("windows", "Network Adapter", "wmic nicconfig"),
+ OSTask("windows", "Active network connection", "wmic netuse"),
+ OSTask("windows", "Protocols", "wmic netprotocol"),
+ OSTask("windows", "Hosts file", `type %SystemRoot%\system32\drivers\etc\hosts`),
+ OSTask("windows", "Cache memory", "wmic memcache"),
+ OSTask("windows", "Physical memory", "wmic memphysical"),
+ OSTask("windows", "Physical memory chip info", "wmic memorychip"),
+ OSTask("windows", "Local storage devices", "wmic logicaldisk"),
+ OSTask("unix", "Filesystem", "df -ha"),
+ MayFail(OSTask("unix", "System activity reporter", "sar 1 10")), // sar is not always installed
+ MayFail(OSTask("unix", "System paging activity", "vmstat 1 10")), // vmstat is not always installed
+ OSTask("unix", "System uptime", "uptime"),
+ MayFail(OSTask("unix", "couchbase user definition", "getent passwd couchbase")), // might not be present in tests
+ Privileged(OSTask("unix", "couchbase user limits", `su couchbase -c "ulimit -a"`)),
+ MayFail(OSTask("unix", "sync_gateway user definition", "getent passwd sync_gateway")), // might not be present in tests
+ Privileged(OSTask("unix", "sync_gateway user limits", `su sync_gateway -c "ulimit -a"`)),
+ OSTask("linux", "Interrupt status", "intrstat 1 10"),
+ OSTask("linux", "Processor status", "mpstat 1 10"),
+ OSTask("solaris", "System log", "cat /var/adm/messages"),
+ OSTask("linux", "Raw /proc/uptime", "cat /proc/uptime"),
+ NoHeader(OverrideOutput(OSTask("linux", "Systemd journal", "journalctl 2>&1 | gzip -c"), "systemd_journal.gz")),
+ NoHeader(OverrideOutput(OSTask("linux", "All logs", "tar cz /var/log/syslog* /var/log/dmesg /var/log/messages* /var/log/daemon* /var/log/debug* /var/log/kern.log* 2>/dev/null"), "syslog.tar.gz")),
+ OSTask("linux", "Relevant proc data", "echo sync_gateway | xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; cat /proc/$1/status; cat /proc/$1/limits; cat /proc/$1/smaps; cat /proc/$1/numa_maps; cat /proc/$1/task/*/sched; echo' --"),
+ OSTask("linux", "Processes' environment", "echo sync_gateway | xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; ( cat /proc/$1/environ | tr \\0 \\n ); echo' --"),
+ OSTask("linux", "NUMA data", "numactl --hardware"),
+ OSTask("linux", "NUMA data", "numactl --show"),
+ OSTask("linux", "NUMA data", "cat /sys/devices/system/node/node*/numastat"),
+ Privileged(OSTask("unix", "Kernel log buffer", "dmesg -H || dmesg")),
+ OSTask("linux", "Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/enabled"),
+ OSTask("linux", "Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/defrag"),
+ OSTask("linux", "Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/enabled"),
+ OSTask("linux", "Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/defrag"),
+ OSTask("linux", "Network statistics", "netstat -s"),
+ OSTask("linux", "Full raw netstat", "cat /proc/net/netstat"),
+ OSTask("linux", "CPU throttling info", "echo /sys/devices/system/cpu/cpu*/thermal_throttle/* | xargs -n1 -- sh -c 'echo $1; cat $1' --"),
+ }
+}
+
+func makeCollectLogsTasks(opts *SGCollectOptions, config ServerConfig) (result []SGCollectTask) {
+ var sgLogFiles = []string{
+ "sg_error",
+ "sg_warn",
+ "sg_info",
+ "sg_debug",
+ "sg_stats",
+ "sync_gateway_access",
+ "sync_gateway_error",
+ }
+ const sgLogExtensionNotRotated = ".log"
+ const sgLogExtensionRotated = ".log.gz"
+ var sgLogDirectories = []string{
+ "/home/sync_gateway/logs",
+ "/var/log/sync_gateway",
+ "/Users/sync_gateway/logs",
+ `C:\Program Files (x86)\Couchbase\var\lib\couchbase\logs`,
+ `C:\Program Files\Couchbase\var\lib\couchbase\logs`,
+ `C:\Program Files\Couchbase\Sync Gateway\var\lib\couchbase\logs`,
+ }
+
+ // Also try getting the current path from the config, in case it's not one of the defaults
+ if cfgPath := config.Logging.LogFilePath; cfgPath != "" {
+ // This could be a relative path
+ if !filepath.IsAbs(cfgPath) {
+ cfgPath = filepath.Join(opts.RootDir, cfgPath)
+ }
+ sgLogDirectories = append(sgLogDirectories, config.Logging.LogFilePath)
+ }
+
+ // Check every combination of directory/file, grab everything we can
+ for _, dir := range sgLogDirectories {
+ // Bail out if the directory doesn't exist, avoids unnecessary checks
+ _, err := os.Stat(dir)
+ if err != nil {
+ if !errors.Is(err, os.ErrNotExist) {
+ log.Printf("WARN: failed to stat %q: %v", dir, err)
+ }
+ continue
+ }
+ for _, file := range sgLogFiles {
+ // Grab the rotated files first, that way they'll be in the right order when ungzipped
+ rotated, err := filepath.Glob(filepath.Join(dir, fmt.Sprintf("%s-*%s", file, sgLogExtensionRotated)))
+ if err != nil {
+ log.Printf("WARN: failed to glob %s in %s: %v", file, dir, err)
+ } else {
+ for _, rotatedFile := range rotated {
+ log.Printf("Collecting rotated log file %s", rotatedFile)
+ result = append(result, OverrideOutput(&GZipFileTask{
+ name: file + sgLogExtensionNotRotated,
+ inputFile: rotatedFile,
+ }, file+sgLogExtensionNotRotated))
+ }
+ }
+ log.Printf("Collecting non-rotated log file %s", filepath.Join(dir, file+sgLogExtensionNotRotated))
+ result = append(result, OverrideOutput(&FileTask{
+ name: file + sgLogExtensionNotRotated,
+ inputFile: filepath.Join(dir, file+sgLogExtensionNotRotated),
+ }, file+sgLogExtensionNotRotated))
+ }
+ }
+ return result
+}
+
+func makeSGTasks(url *url.URL, opts *SGCollectOptions, config ServerConfig) (result []SGCollectTask) {
+ binary, bootstrapConfigPath := findSGBinaryAndConfigs(url, opts)
+ if binary != "" {
+ result = append(result, OverrideOutput(NoHeader(&FileTask{
+ name: "Sync Gateway executable",
+ inputFile: binary,
+ }), "sync_gateway"))
+ }
+ if bootstrapConfigPath != "" {
+ result = append(result, RemovePasswords(OverrideOutput(NoHeader(&FileTask{
+ name: "Sync Gateway bootstrapConfigPath",
+ inputFile: bootstrapConfigPath,
+ }), "sync_gateway.json")))
+ }
+
+ result = append(result, OverrideOutput(NoHeader(&URLTask{
+ name: "Sync Gateway expvars",
+ url: url.String() + "/_expvar",
+ }), "expvars.json"), RemovePasswords(&URLTask{
+ name: "Collect server config",
+ url: url.String() + "/_config",
+ }), RemovePasswords(&URLTask{
+ name: "Collect runtime config",
+ url: url.String() + "/_config?include_runtime=true",
+ }), RemovePasswords(&URLTask{
+ name: "Collect server status",
+ url: url.String() + "/_status",
+ }))
+ if len(config.Databases) > 0 {
+ for db := range config.Databases {
+ result = append(result, RemovePasswords(&URLTask{
+ name: fmt.Sprintf("Database config - %q", db),
+ url: url.String() + fmt.Sprintf("/%s/_config?include_runtime=true", db),
+ }))
+ }
+ }
+ for _, profile := range [...]string{"profile", "heap", "goroutine", "block", "mutex"} {
+ result = append(result, OverrideOutput(NoHeader(&URLTask{
+ name: fmt.Sprintf("Collect %s pprof", profile),
+ url: url.String() + fmt.Sprintf("/_debug/pprof/%s", profile),
+ // Override timeout for pprof requests as they can take a bit longer
+ timeout: durationPtr(time.Minute),
+ }), fmt.Sprintf("pprof_%s.pb.gz", profile)))
+ }
+ result = append(result, makeCollectLogsTasks(opts, config)...)
+ return
+}
+
+func MakeAllTasks(url *url.URL, opts *SGCollectOptions, config ServerConfig) []SGCollectTask {
+ result := []SGCollectTask{
+ new(SGCollectOptionsTask),
+ }
+ result = append(result, makeOSTasks()...)
+ result = append(result, makeSGTasks(url, opts, config)...)
+ return result
+}
+
+func durationPtr(d time.Duration) *time.Duration {
+ return &d
+}
diff --git a/tools/sgcollect/types.go b/tools/sgcollect/types.go
new file mode 100644
index 0000000000..54bad334b5
--- /dev/null
+++ b/tools/sgcollect/types.go
@@ -0,0 +1,9 @@
+package main
+
+// ServerConfig is a subset of rest.RunTimeServerConfigResponse, copied here to keep the sgcollect binary size down.
+type ServerConfig struct {
+ Logging struct {
+ LogFilePath string `json:"log_file_path,omitempty"`
+ } `json:"logging,omitempty"`
+ Databases map[string]any `json:"databases"`
+}
diff --git a/tools/sgcollect/utilities_testing.go b/tools/sgcollect/utilities_testing.go
new file mode 100644
index 0000000000..686258ab63
--- /dev/null
+++ b/tools/sgcollect/utilities_testing.go
@@ -0,0 +1,41 @@
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+type TaskTester struct {
+ t *testing.T
+ opts *SGCollectOptions
+}
+
+func (tt *TaskTester) RunTask(task SGCollectTask) (*bytes.Buffer, TaskExecutionResult) {
+ var buf bytes.Buffer
+ res := ExecuteTask(task, tt.opts, &buf, func(s string) {
+ tt.t.Log(s)
+ }, true)
+ return &buf, res
+}
+
+func AssertDidNotFail(t *testing.T, ter TaskExecutionResult) bool {
+ return assert.NoError(t, ter.Error, fmt.Sprintf("Task %s [%s] errored", ter.Task.Name(), ter.Task.Header()))
+}
+
+func AssertRan(t *testing.T, ter TaskExecutionResult) bool {
+ if ter.SkipReason != "" {
+ assert.Failf(t, fmt.Sprintf("Task %s [%s] skipped", ter.Task.Name(), ter.Task.Header()), ter.SkipReason)
+ return false
+ }
+ return AssertDidNotFail(t, ter)
+}
+
+func NewTaskTester(t *testing.T, optOverrides SGCollectOptions) *TaskTester {
+ return &TaskTester{
+ t: t,
+ opts: &optOverrides,
+ }
+}
diff --git a/tools/sgcollect_info b/tools/sgcollect_info
deleted file mode 100755
index 6e5e358f68..0000000000
--- a/tools/sgcollect_info
+++ /dev/null
@@ -1,807 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Copyright 2016-Present Couchbase, Inc.
-
-Use of this software is governed by the Business Source License included in
-the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
-file, in accordance with the Business Source License, use of this software will
-be governed by the Apache License, Version 2.0, included in the file
-licenses/APL2.txt.
-"""
-
-# -*- python -*-
-import base64
-import glob
-import json
-import optparse
-import os
-import platform
-import re
-import ssl
-import subprocess
-import sys
-import urllib.error
-import urllib.parse
-import urllib.request
-import uuid
-from sys import platform as _platform
-
-import password_remover
-from tasks import AllOsTask
-from tasks import CbcollectInfoOptions
-from tasks import TaskRunner
-from tasks import add_file_task
-from tasks import add_gzip_file_task
-from tasks import do_upload_and_exit
-from tasks import dump_utilities
-from tasks import flatten
-from tasks import generate_upload_url
-from tasks import log
-from tasks import make_curl_task
-from tasks import make_os_tasks
-from tasks import setup_stdin_watcher
-
-try:
- # Don't validate HTTPS by default.
- _create_unverified_https_context = ssl._create_unverified_context
-except AttributeError:
- # Running an older version of Python which won't validate HTTPS anyway.
- pass
-else:
- ssl._create_default_https_context = _create_unverified_https_context
-
-# Collects the following info from Sync Gateway
-#
-# - System Stats (top, netstat, etc)
-# - Sync Gateway logs
-# - Expvar Json
-# - pprof files (profiling / memory)
-# - Startup and running SG config
-#
-# See https://github.com/couchbase/sync_gateway/issues/1640
-#
-# Python version compatibility:
-#
-# Until the libc / centos6 issues are resolved, this should remain python 2.6 compatible.
-# One common incompatibility is the formatting syntax, as discussed here: http://bit.ly/2rIH8wg
-USAGE = """usage: %prog [options] output_file.zip
-
-- Linux/Windows/OSX:
- %prog output_file.zip
- %prog -v output_file.zip"""
-
-mydir = os.path.dirname(sys.argv[0])
-
-
-def create_option_parser():
- parser = optparse.OptionParser(usage=USAGE, option_class=CbcollectInfoOptions)
- parser.add_option("-r", dest="root",
- help="root directory - defaults to %s" % (mydir + "/.."),
- default=os.path.abspath(os.path.join(mydir, "..")))
- parser.add_option("-v", dest="verbosity", help="increase verbosity level",
- action="count", default=0)
- parser.add_option("-p", dest="product_only", help="gather only product related information",
- action="store_true", default=False)
- parser.add_option("-d", action="callback", callback=dump_utilities,
- help="dump a list of commands that sgcollect_info needs")
- parser.add_option("--watch-stdin", dest="watch_stdin",
- action="store_true", default=False,
- help=optparse.SUPPRESS_HELP)
- parser.add_option("--log-redaction-level", dest="redact_level",
- default="none",
- help="redaction level for the logs collected, none and partial supported (default is none)")
- parser.add_option("--log-redaction-salt", dest="salt_value",
- default=str(uuid.uuid4()),
- help="Is used to salt the hashing of tagged data, \
- defaults to random uuid. If input by user it should \
- be provided along with --log-redaction-level option")
- parser.add_option("--just-upload-into", dest="just_upload_into",
- help=optparse.SUPPRESS_HELP)
- parser.add_option("--upload-host", dest="upload_host",
- help="if specified, gathers diagnostics and uploads it to the specified host,"
- " e.g 'https://uploads.couchbase.com'")
- parser.add_option("--customer", dest="upload_customer",
- help="used in conjunction with '--upload-host' and '--ticket', "
- "specifies the customer name for the upload")
- parser.add_option("--ticket", dest="upload_ticket", type='ticket',
- help="used in conjunction with '--upload-host' and '--customer',"
- " specifies the support ticket number for the upload."
- " e.g 1234 (must be numeric), contact Couchbase Support to open a new"
- "ticket if you do not already have one. For more info, see"
- "http://www.couchbase.com/wiki/display/couchbase/Working+with+the+Couchbase+Technical+Support+Team")
- parser.add_option("--sync-gateway-url", dest="sync_gateway_url",
- help="Sync Gateway admin port URL, eg, http://localhost:4985")
- parser.add_option("--sync-gateway-config", dest="sync_gateway_config",
- help="path to Sync Gateway config. By default will try to discover via expvars")
- parser.add_option("--sync-gateway-executable", dest="sync_gateway_executable",
- help="path to Sync Gateway executable. By default will try to discover via expvars")
- parser.add_option("--sync-gateway-username", dest="sync_gateway_username",
- help="Sync Gateway Admin API username ")
- parser.add_option("--sync-gateway-password", dest="sync_gateway_password",
- help="Sync Gateway Admin API password ")
- parser.add_option("--upload-proxy", dest="upload_proxy", default="",
- help="specifies proxy for upload")
- parser.add_option("--tmp-dir", dest="tmp_dir", default=None,
- help="set the temp dir used while processing collected data. Overrides the TMPDIR env variable if set")
- return parser
-
-
-def expvar_url(sg_url):
-
- return '{0}/_expvar'.format(sg_url)
-
-
-def make_http_client_pprof_tasks(sg_url, sg_username, sg_password):
-
- """
- These tasks use the python http client to collect the raw pprof data, which can later
- be rendered into something human readable
- """
- profile_types = [
- "profile",
- "heap",
- "goroutine",
- "block",
- "mutex",
- ]
-
- base_pprof_url = "{0}/_debug/pprof".format(sg_url)
-
- pprof_tasks = []
- for profile_type in profile_types:
- sg_pprof_url = "{0}/{1}".format(base_pprof_url, profile_type)
- clean_task = make_curl_task(name="Collect {0} pprof via http client".format(profile_type),
- user=sg_username,
- password=sg_password,
- url=sg_pprof_url,
- log_file="pprof_{0}.pb.gz".format(profile_type))
- clean_task.no_header = True
- pprof_tasks.append(clean_task)
-
- return pprof_tasks
-
-
-def to_lower_case_keys_dict(original_dict):
- result = {}
- for k, v in list(original_dict.items()):
- result[k.lower()] = v
- return result
-
-
-def extract_element_from_config(element, config):
- """ The config returned from /_config may not be fully formed json
- due to the fact that the sync function is inside backticks (`)
- Therefore this method grabs an element from the config after
- removing the sync function
- """
-
- sync_regex = r'"Sync":(`.*`)'
- config = re.sub(sync_regex, '"Sync":""', config)
- try:
- # convert dictionary keys to lower case
- original_dict = json.loads(config)
- lower_case_keys_dict = to_lower_case_keys_dict(original_dict)
-
- # lookup key after converting element name to lower case
- return lower_case_keys_dict[element.lower()]
- except (ValueError, KeyError):
- # If we can't deserialize the json or find the key then return nothing
- return
-
-
-def extract_element_from_default_logging_config(element, config):
- # extracts a property from nested logging object
- try:
- logging_config = extract_element_from_config('Logging', config)
- if logging_config:
- default_logging_config = extract_element_from_config('default', json.dumps(logging_config))
- if default_logging_config:
- guessed_log_path = extract_element_from_config(element, json.dumps(default_logging_config))
- if guessed_log_path:
- return guessed_log_path
- return
- except (ValueError, KeyError):
- # If we can't deserialize the json or find the key then return nothing
- return
-
-
-def extract_element_from_logging_config(element, config):
- # extracts a property from nested logging object
- try:
- logging_config = extract_element_from_config('logging', config)
- if logging_config:
- guessed_log_path = extract_element_from_config(element, json.dumps(logging_config))
- if guessed_log_path:
- return guessed_log_path
- return
- except (ValueError, KeyError):
- # If we can't deserialize the json or find the key then return nothing
- return
-
-def urlopen_with_basic_auth(url, username, password):
- if username and len(username) > 0:
- # Add basic auth header
- request = urllib.request.Request(url)
- base64string = base64.b64encode(bytes('%s:%s' % (username, password),'utf-8'))
- request.add_header("Authorization", "Basic %s" % base64string.decode('utf-8'))
- return urllib.request.urlopen(request)
- else:
- return urllib.request.urlopen(url)
-
-def make_collect_logs_tasks(zip_dir, sg_url, sg_config_file_path, sg_username, sg_password, salt, should_redact):
-
- sg_log_files = {
- "sg_error.log": "sg_error.log",
- "sg_warn.log": "sg_warn.log",
- "sg_info.log": "sg_info.log",
- "sg_debug.log": "sg_debug.log",
- "sg_stats.log": "sg_stats.log",
- "sync_gateway_access.log": "sync_gateway_access.log",
- "sync_gateway_error.log": "sync_gateway_error.log",
- }
-
- os_home_dirs = [
- "/home/sync_gateway/logs",
- "/var/log/sync_gateway",
- "/Users/sync_gateway/logs", # OSX sync gateway
- R'C:\Program Files (x86)\Couchbase\var\lib\couchbase\logs', # Windows (Pre-2.0)
- R'C:\Program Files\Couchbase\var\lib\couchbase\logs', # Windows (Post-2.0)
- R'C:\Program Files\Couchbase\Sync Gateway\var\lib\couchbase\logs' # Windows (Post-2.1) sync gateway
- ]
- # Try to find user-specified log path
- if sg_url:
- config_url = "{0}/_config".format(sg_url)
- try:
- response = urlopen_with_basic_auth(config_url, sg_username, sg_password)
- except urllib.error.URLError:
- print("Failed to load SG config from running SG.")
- config_str = ""
- else:
- config_str = response.read().decode('utf-8')
- else:
- config_str = ""
-
- # If SG isn't running, load the bootstrap config - it'll have the logging config in it
- if config_str == "" and sg_config_file_path is not None and sg_config_file_path != "":
- try:
- with open(sg_config_file_path) as fd:
- print("Loading SG config from path on disk.")
- config_str = fd.read()
- except Exception as e:
- print("Failed to load SG config from disk: {0}".format(e))
-
- # Find log file path from old style top level config
- guessed_log_path = extract_element_from_config('LogFilePath', config_str)
- if guessed_log_path and os.path.isfile(guessed_log_path):
- # If the specified log path is a file, add filename to standard SG log files list
- # and parent directory path of the file to standard SG log directories list to
- # eventually look for all permutations of SG log files and directories.
- os_home_dirs.append(os.path.dirname(guessed_log_path))
- sg_log_files[os.path.basename(guessed_log_path)] = os.path.basename(guessed_log_path)
- elif guessed_log_path and os.path.isdir(guessed_log_path):
- # If the specified log path is a directory, add that path to the standard
- # SG log directories list to eventually look for the standard SG log files.
- os_home_dirs.append(guessed_log_path)
-
- # Keep a dictionary of log file paths we've added, to avoid adding duplicates
- sg_log_file_paths = {}
-
- sg_tasks = []
-
- def lookup_std_log_files(files, dirs):
- for dir in dirs:
- for file in files:
- name, ext = os.path.splitext(file)
- # Collect active and rotated log files from the default log locations.
- pattern_rotated = os.path.join(dir, "{0}*{1}".format(name, ext))
- for std_log_file in glob.glob(pattern_rotated):
- if std_log_file not in sg_log_file_paths:
- sg_tasks.append(add_file_task(sourcefile_path=std_log_file))
- sg_log_file_paths[std_log_file] = std_log_file
-
- # Collect archived log files from the default log locations.
- pattern_archived = os.path.join(dir, "{0}*{1}.gz".format(name, ext))
- for std_log_file in glob.glob(pattern_archived):
- if std_log_file not in sg_log_file_paths:
- if should_redact:
- task = add_gzip_file_task(sourcefile_path=std_log_file, salt=salt)
- sg_tasks.append(task)
- else:
- task = add_file_task(sourcefile_path=std_log_file)
- task.no_header = True
- sg_tasks.append(task)
-
- # Lookup each standard SG log files in each standard SG log directories.
- lookup_std_log_files(sg_log_files, os_home_dirs)
-
- # Find log file path from logging.["default"] style config
- # When the log file path from the default style config is a directory, we need to look for all
- # standard SG log files inside the directory including rotated log files. But that handling is
- # not included here, it relies on the fall through to the 2.1 style handling to pick up those
- # log files that were written to the directory path instead; SG exposes the log_file_path as
- # the parent directory of the file specified against LogFilePath through /_config endpoint.
- guessed_logging_path = extract_element_from_default_logging_config('LogFilePath', config_str)
- if guessed_logging_path and os.path.isfile(guessed_logging_path):
- # Get the parent directory and the log file name
- log_file_parent_dir = os.path.abspath(os.path.join(guessed_logging_path, os.pardir))
- log_file_name = os.path.basename(guessed_logging_path)
- name, ext = os.path.splitext(log_file_name)
-
- # Lookup SG log files inside the parent directory, including rotated log files.
- rotated_logs_pattern = os.path.join(log_file_parent_dir, "{0}*{1}".format(name, ext))
- for log_file_item_name in glob.iglob(rotated_logs_pattern):
- log_file_item_path = os.path.join(log_file_parent_dir, log_file_item_name)
- # As long as a task that monitors this log file path has not already been added, add a new task
- if log_file_item_path not in sg_log_file_paths:
- print('Capturing rotated log file {0}'.format(log_file_item_path))
- task = add_file_task(sourcefile_path=log_file_item_path)
- sg_tasks.append(task)
- # Track which log file paths have been added so far.
- sg_log_file_paths[log_file_item_path] = log_file_item_path
-
- # Lookup standard SG log files inside the parent directory.
- lookup_std_log_files(sg_log_files, [log_file_parent_dir])
-
- # Find log file path from SGW 2.1 style logging config
- guessed_log_file_path = extract_element_from_logging_config('log_file_path', config_str)
- if guessed_log_file_path:
- log_file_path = os.path.abspath(guessed_log_file_path)
-
- for log_file_name in sg_log_files:
- # iterate over all log files, including those with a rotation timestamp
- # e.g: sg_info-2018-12-31T13-33-41.055.log
- name, ext = os.path.splitext(log_file_name)
- log_file_pattern = "{0}*{1}".format(name, ext)
- rotated_logs_pattern = os.path.join(log_file_path, log_file_pattern)
-
- for log_file_item_name in glob.iglob(rotated_logs_pattern):
- log_file_item_path = os.path.join(log_file_path, log_file_item_name)
- # As long as a task that monitors this log file path has not already been added, add a new task
- if log_file_item_path not in sg_log_file_paths:
- print('Capturing rotated log file {0}'.format(log_file_item_path))
- task = add_file_task(sourcefile_path=log_file_item_path)
- sg_tasks.append(task)
-
- # Track which log file paths have been added so far
- sg_log_file_paths[log_file_item_path] = log_file_item_path
-
- # try gzipped logs too
- # e.g: sg_info-2018-12-31T13-33-41.055.log.gz
- log_file_pattern = "{0}*{1}.gz".format(name, ext)
- rotated_logs_pattern = os.path.join(log_file_path, log_file_pattern)
-
- for log_file_item_name in glob.iglob(rotated_logs_pattern):
- log_file_item_path = os.path.join(log_file_path, log_file_item_name)
- # As long as a task that monitors this log file path has not already been added, add a new task
- if log_file_item_path not in sg_log_file_paths:
- print('Capturing compressed rotated log file {0}'.format(log_file_item_path))
- # If we're redacting a gzipped log file, we'll need to extract, redact and recompress it.
- # If we're not redacting, we can skip extraction entirely, and use the existing .gz log file.
- if should_redact:
- task = add_gzip_file_task(sourcefile_path=log_file_item_path, salt=salt)
- sg_tasks.append(task)
- else:
- task = add_file_task(sourcefile_path=log_file_item_path)
- task.no_header = True
- sg_tasks.append(task)
-
- # Track which log file paths have been added so far
- sg_log_file_paths[log_file_item_path] = log_file_item_path
-
- return sg_tasks
-
-
-def get_db_list(sg_url, sg_username, sg_password):
-
- # build url to _all_dbs
- all_dbs_url = "{0}/_all_dbs".format(sg_url)
- data = []
-
- # get content and parse into json
- try:
- response = urlopen_with_basic_auth(all_dbs_url, sg_username, sg_password)
- data = json.load(response)
- except urllib.error.URLError as e:
- print("WARNING: Unable to connect to Sync Gateway: {0}".format(e))
-
- # return list of dbs
- return data
-
-# Startup config
-# Commandline args (covered in expvars, IIRC)
-# json file.
-# Running config
-# Server config
-# Each DB config
-def make_config_tasks(zip_dir, sg_config_path, sg_url, sg_username, sg_password, should_redact):
-
- collect_config_tasks = []
-
- # Here are the "usual suspects" to probe for finding the static config
- sg_config_files = [
- "/home/sync_gateway/sync_gateway.json", # linux sync gateway
- "/opt/sync_gateway/etc/sync_gateway.json", # amazon linux AMI sync gateway
- "/Users/sync_gateway/sync_gateway.json" # OSX sync gateway
- R'C:\Program Files (x86)\Couchbase\serviceconfig.json' # Windows (Pre-2.0) sync gateway
- R'C:\Program Files\Couchbase\Sync Gateway\serviceconfig.json' # Windows (Post-2.0) sync gateway
- ]
- sg_config_files = [x for x in sg_config_files if os.path.exists(x)]
-
- # If a config path was discovered from the expvars, or passed in via the user, add that in the
- # list of files to probe
- if sg_config_path is not None:
- sg_config_files.append(sg_config_path)
-
- # Tag user data before redaction, if redact_level is set
- server_config_postprocessors = [password_remover.remove_passwords]
- db_config_postprocessors = [password_remover.remove_passwords]
- if should_redact:
- server_config_postprocessors.append(password_remover.tag_userdata_in_server_config)
- db_config_postprocessors.append(password_remover.tag_userdata_in_db_config)
-
- # Get static server config
- for sg_config_file in sg_config_files:
- task = add_file_task(sourcefile_path=sg_config_file, content_postprocessors=server_config_postprocessors)
- collect_config_tasks.append(task)
-
- # Get server config
- server_config_url = "{0}/_config".format(sg_url)
-
- config_task = make_curl_task(name="Collect server config",
- user=sg_username,
- password=sg_password,
- url=server_config_url,
- log_file="sync_gateway.log",
- content_postprocessors=server_config_postprocessors)
- collect_config_tasks.append(config_task)
-
- # Get server config with runtime defaults and runtime dbconfigs
- server_runtime_config_url = "{0}/_config?include_runtime=true".format(sg_url)
- runtime_config_task = make_curl_task(name="Collect runtime config",
- user=sg_username,
- password=sg_password,
- url=server_runtime_config_url,
- log_file="sync_gateway.log",
- content_postprocessors=server_config_postprocessors)
- collect_config_tasks.append(runtime_config_task)
-
- # Get persisted dbconfigs
- dbs = get_db_list(sg_url, sg_username, sg_password)
- for db in dbs:
- db_config_url = "{0}/{1}/_config".format(sg_url, db)
- db_config_task = make_curl_task(name="Collect {0} database config".format(db),
- user=sg_username,
- password=sg_password,
- url=db_config_url,
- log_file="sync_gateway.log",
- content_postprocessors=db_config_postprocessors)
- collect_config_tasks.append(db_config_task)
-
- return collect_config_tasks
-
-
-def get_config_path_from_cmdline(cmdline_args):
-
- for cmdline_arg in cmdline_args:
- # if it has .json in the path, assume it's a config file.
- # ignore any config files that are URL's for now, since
- # they won't be handled correctly.
- if ".json" in cmdline_arg and "http" not in cmdline_arg:
- return cmdline_arg
- return None
-
-
-def get_paths_from_expvars(sg_url, sg_username, sg_password):
-
- data = None
- sg_binary_path = None
- sg_config_path = None
-
- # get content and parse into json
- if sg_url:
- try:
- response = urlopen_with_basic_auth(expvar_url(sg_url), sg_username, sg_password)
- # response = urllib.request.urlopen(expvar_url(sg_url))
- data = json.load(response)
- except urllib.error.URLError as e:
- print("WARNING: Unable to connect to Sync Gateway: {0}".format(e))
-
- if data is not None and "cmdline" in data:
- cmdline_args = data["cmdline"]
- if len(cmdline_args) == 0:
- return (sg_binary_path, sg_config_path)
- sg_binary_path = cmdline_args[0]
- if len(cmdline_args) > 1:
- try:
- sg_config_path = get_absolute_path(get_config_path_from_cmdline(cmdline_args[1:]))
- except Exception as e:
- print("Exception trying to get absolute sync gateway path from expvars: {0}".format(e))
- sg_config_path = get_config_path_from_cmdline(cmdline_args[1:])
-
- return (sg_binary_path, sg_config_path)
-
-
-def get_absolute_path(relative_path):
- sync_gateway_cwd = ''
- try:
- if _platform.startswith("linux"):
- sync_gateway_pid = subprocess.check_output(['pgrep', 'sync_gateway']).split()[0]
- sync_gateway_cwd = subprocess.check_output(['readlink', '-e', '/proc/{0}/cwd'.format(sync_gateway_pid)]).strip('\n')
- except subprocess.CalledProcessError:
- pass
-
- return os.path.join(sync_gateway_cwd, relative_path)
-
-
-def make_download_expvars_task(sg_url, sg_username, sg_password):
-
- task = make_curl_task(
- name="download_sg_expvars",
- user=sg_username,
- password=sg_password,
- url=expvar_url(sg_url),
- log_file="expvars.json"
- )
-
- task.no_header = True
-
- return task
-
-
-def make_sg_tasks(zip_dir, sg_url, sg_username, sg_password, sync_gateway_config_path_option, sync_gateway_executable_path, should_redact, salt):
-
- # Get path to sg binary (reliable) and config (not reliable)
- sg_binary_path, sg_config_path = get_paths_from_expvars(sg_url, sg_username, sg_password)
- print("Discovered from expvars: sg_binary_path={0} sg_config_path={1}".format(sg_binary_path, sg_config_path))
-
- # If user passed in a specific path to the SG binary, then use it
- if sync_gateway_executable_path is not None and len(sync_gateway_executable_path) > 0:
- if not os.path.exists(sync_gateway_executable_path):
- raise Exception("Path to sync gateway executable passed in does not exist: {0}".format(sync_gateway_executable_path))
- sg_binary_path = sync_gateway_executable_path
-
- if sg_config_path is None and sync_gateway_config_path_option is not None and len(sync_gateway_config_path_option) > 0:
- sg_config_path = sync_gateway_config_path_option
-
- # Collect logs
- collect_logs_tasks = make_collect_logs_tasks(zip_dir, sg_url, sg_config_path, sg_username, sg_password, salt, should_redact)
-
- py_expvar_task = make_download_expvars_task(sg_url, sg_username, sg_password)
-
- # If the user passed in a valid config path, then use that rather than what's in the expvars
- if sync_gateway_config_path_option is not None and len(sync_gateway_config_path_option) > 0 and os.path.exists(sync_gateway_config_path_option):
- sg_config_path = sync_gateway_config_path_option
-
- http_client_pprof_tasks = make_http_client_pprof_tasks(sg_url, sg_username, sg_password)
-
- # Add a task to collect Sync Gateway config
- config_tasks = make_config_tasks(zip_dir, sg_config_path, sg_url, sg_username, sg_password, should_redact)
-
- # Curl the /_status
- status_tasks = make_curl_task(name="Collect server status",
- user=sg_username,
- password=sg_password,
- url="{0}/_status".format(sg_url),
- log_file="sync_gateway.log",
- content_postprocessors=[password_remover.pretty_print_json])
-
- # Combine all tasks into flattened list
- sg_tasks = flatten(
- [
- collect_logs_tasks,
- py_expvar_task,
- http_client_pprof_tasks,
- config_tasks,
- status_tasks,
- ]
- )
-
- return sg_tasks
-
-
-def discover_sg_binary_path(options, sg_url, sg_username, sg_password):
-
- sg_bin_dirs = [
- "/opt/couchbase-sync-gateway/bin/sync_gateway", # Linux + OSX
- R'C:\Program Files (x86)\Couchbase\sync_gateway.exe', # Windows (Pre-2.0)
- R'C:\Program Files\Couchbase\Sync Gateway\sync_gateway.exe', # Windows (Post-2.0)
- ]
-
- for sg_binary_path_candidate in sg_bin_dirs:
- if os.path.exists(sg_binary_path_candidate):
- return sg_binary_path_candidate
-
- sg_binary_path, _ = get_paths_from_expvars(sg_url, sg_username, sg_password)
-
- if options.sync_gateway_executable is not None and len(options.sync_gateway_executable) > 0:
- if not os.path.exists(options.sync_gateway_executable):
- raise Exception(
- "Path to sync gateway executable passed in does not exist: {0}".format(options.sync_gateway_executable))
- return sg_binary_path
-
- # fallback to whatever was specified in options
- return options.sync_gateway_executable
-
-
-def main():
-
- # ask all tools to use C locale (MB-12050)
- os.environ['LANG'] = 'C'
- os.environ['LC_ALL'] = 'C'
-
- # Workaround MB-8239: erl script fails in OSX as it is unable to find COUCHBASE_TOP
- if platform.system() == 'Darwin':
- os.environ["COUCHBASE_TOP"] = os.path.abspath(os.path.join(mydir, ".."))
-
- # Parse command line options
- parser = create_option_parser()
- options, args = parser.parse_args()
-
- # Validate args
- if len(args) != 1:
- parser.error("incorrect number of arguments. Expecting filename to collect diagnostics into")
-
- # Setup stdin watcher if this option was passed
- if options.watch_stdin:
- setup_stdin_watcher()
-
- sg_url = options.sync_gateway_url
- sg_username = options.sync_gateway_username
- sg_password = options.sync_gateway_password
-
- if not sg_url or "://" not in sg_url:
- if not sg_url:
- root_url = "127.0.0.1:4985"
- else:
- root_url = sg_url
- sg_url_http = "http://" + root_url
- print("Trying Sync Gateway URL: {0}".format(sg_url_http))
-
- # Set sg_url to sg_url_http at this point
- # If we're unable to determine which URL to use this is our best
- # attempt. Avoids having this is 'None' later
- sg_url = sg_url_http
-
- try:
- response = urlopen_with_basic_auth(sg_url_http, sg_username, sg_password)
- json.load(response)
- except Exception as e:
- print("Failed to communicate with: {} {}".format(sg_url_http, e))
- sg_url_https = "https://" + root_url
- print("Trying Sync Gateway URL: {0}".format(sg_url_https))
- try:
- response = urlopen_with_basic_auth(sg_url_https, sg_username, sg_password)
- json.load(response)
- except Exception as e:
- print("Failed to communicate with Sync Gateway using url {}. "
- "Check that Sync Gateway is running and reachable. "
- "Will attempt to continue anyway.".format(e))
- else:
- sg_url = sg_url_https
-
- # Build path to zip directory, make sure it exists
- zip_filename = args[0]
- if zip_filename[-4:] != '.zip':
- zip_filename = zip_filename + '.zip'
- zip_dir = os.path.dirname(os.path.abspath(zip_filename))
- if not os.access(zip_dir, os.W_OK | os.X_OK):
- print("do not have write access to the directory %s" % (zip_dir))
- sys.exit(1)
-
- if options.redact_level != "none" and options.redact_level != "partial":
- parser.error("Invalid redaction level. Only 'none' and 'partial' are supported.")
-
- upload_url = ""
- should_redact = False
- if options.redact_level != "none":
- should_redact = True
-
- # Generate the s3 URL where zip files will be updated
- redact_zip_file = zip_filename[:-4] + "-redacted" + zip_filename[-4:]
- upload_url = generate_upload_url(parser, options, redact_zip_file)
- else:
- upload_url = generate_upload_url(parser, options, zip_filename)
-
- # Linux
- if os.name == 'posix':
-
- path = [
- mydir,
- '/opt/couchbase/bin',
- os.environ['PATH'],
- '/bin',
- '/sbin',
- '/usr/bin',
- '/usr/sbin'
- ]
- os.environ['PATH'] = ':'.join(path)
-
- library_path = [
- os.path.join(options.root, 'lib')
- ]
-
- current_library_path = os.environ.get('LD_LIBRARY_PATH')
- if current_library_path is not None:
- library_path.append(current_library_path)
-
- os.environ['LD_LIBRARY_PATH'] = ':'.join(library_path)
-
- # Windows
- elif os.name == 'nt':
-
- path = [
- mydir,
- os.environ['PATH']
- ]
- os.environ['PATH'] = ';'.join(path)
-
- # If user asked to just upload, then upload and exit
- if options.just_upload_into is not None:
- do_upload_and_exit(args[0], options.just_upload_into, options.upload_proxy)
-
- # Create a TaskRunner and run all of the OS tasks (collect top, netstat, etc)
- # The output of the tasks will go directly into couchbase.log
- runner = TaskRunner(verbosity=options.verbosity,
- default_name="sync_gateway.log",
- tmp_dir=options.tmp_dir)
-
- if not options.product_only:
- for task in make_os_tasks(["sync_gateway"]):
- runner.run(task)
-
- # Output the Python version if verbosity was enabled
- if options.verbosity:
- log("Python version: %s" % sys.version)
-
- # Find path to sg binary
- sg_binary_path = discover_sg_binary_path(options, sg_url, sg_username, sg_password)
-
- # Run SG specific tasks
- for task in make_sg_tasks(zip_dir, sg_url, sg_username, sg_password, options.sync_gateway_config, options.sync_gateway_executable, should_redact, options.salt_value):
- runner.run(task)
-
- if sg_binary_path is not None and sg_binary_path != "" and os.path.exists(sg_binary_path):
- runner.collect_file(sg_binary_path)
- else:
- print("WARNING: unable to find Sync Gateway executable, omitting from result. Go pprofs will not be accurate.")
-
- # Echo the command line args used to run sgcollect_info
- cmd_line_args_task = AllOsTask(
- "Echo sgcollect_info cmd line args",
- "echo options: {0} args: {1}".format({k: ud(v, should_redact) for k, v in list(options.__dict__.items())}, args),
- log_file="sgcollect_info_options.log",
- )
- runner.run(cmd_line_args_task)
-
- runner.close_all_files()
-
- # Build redacted zip file
- if options.redact_level != "none":
- log("Redacting log files to level: %s" % options.redact_level)
- runner.redact_and_zip(redact_zip_file, 'sgcollect_info', options.salt_value, platform.node())
-
- # Build the actual zip file
- runner.zip(zip_filename, 'sgcollect_info', platform.node())
-
- # Upload the zip to the URL to S3 if required
- if upload_url:
- if options.redact_level != "none":
- do_upload_and_exit(redact_zip_file, upload_url, options.upload_proxy)
- else:
- do_upload_and_exit(zip_filename, upload_url, options.upload_proxy)
-
- if options.redact_level != "none":
- print("Zipfile built: {0}".format(redact_zip_file))
-
- print("Zipfile built: {0}".format(zip_filename))
-
-
-def ud(value, should_redact=True):
- if not should_redact:
- return value
- return "{0}".format(value)
-
-
-if __name__ == '__main__':
- main()
diff --git a/tools/tasks.py b/tools/tasks.py
deleted file mode 100644
index 0ef9c05e2a..0000000000
--- a/tools/tasks.py
+++ /dev/null
@@ -1,1162 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Copyright 2016-Present Couchbase, Inc.
-
-Use of this software is governed by the Business Source License included in
-the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
-file, in accordance with the Business Source License, use of this software will
-be governed by the Apache License, Version 2.0, included in the file
-licenses/APL2.txt.
-"""
-
-# -*- python -*-
-import atexit
-import base64
-import glob
-import gzip
-import hashlib
-import io
-import mmap
-import optparse
-import os
-import re
-import shutil
-import signal
-import socket
-import subprocess
-import sys
-import tempfile
-import threading
-import time
-import traceback
-import urllib.error
-import urllib.parse
-import urllib.request
-
-# The 'latin-1' encoding is being used since we can't guarantee that all bytes that will be
-# processed through sgcollect will be decodable from 'utf-8' (which is the default in Python)
-# and the decoder may fail if it encounters any such byte sequence whilst decoding byte strings.
-# The 'latin-1' encoding belongs to the ISO-8859 family and is capable of decoding any byte sequence.
-ENCODING_LATIN1 = 'latin-1'
-
-# Error handler is being used to handle special cases on Windows platforms when the cp1252
-# encoding is referred to as 'latin-1', it does not map all possible byte values.
-BACKSLASH_REPLACE = 'backslashreplace'
-
-class LogRedactor:
- def __init__(self, salt, tmpdir):
- self.target_dir = os.path.join(tmpdir, "redacted")
- os.makedirs(self.target_dir)
-
- self.couchbase_log = CouchbaseLogProcessor(salt)
- self.regular_log = RegularLogProcessor(salt)
-
- def _process_file(self, ifile, ofile, processor):
- try:
- with open(ifile, 'r', newline='', encoding=ENCODING_LATIN1, errors=BACKSLASH_REPLACE) as inp:
- with open(ofile, 'w+', newline='', encoding=ENCODING_LATIN1, errors=BACKSLASH_REPLACE) as out:
- # Write redaction header
- out.write(self.couchbase_log.do("RedactLevel"))
- for line in inp:
- out.write(processor.do(line))
- except IOError as e:
- log("I/O error(%s): %s" % (e.errno, e.strerror))
-
- def redact_file(self, name, ifile):
- _, filename = os.path.split(name)
- ofile = os.path.join(self.target_dir, filename)
- self._process_file(ifile, ofile, self.regular_log)
- return ofile
-
- def redact_string(self, istring):
- ostring = self.couchbase_log.do("RedactLevel")
- ostring += self.regular_log.do(istring)
- return ostring
-
-
-class CouchbaseLogProcessor:
- def __init__(self, salt):
- self.salt = salt
-
- def do(self, line):
- if "RedactLevel" in line:
- # salt + salt to maintain consistency with other
- # occurrences of hashed salt in the logs.
- return 'RedactLevel:partial,HashOfSalt:%s\n' \
- % generate_hash(self.salt + self.salt).hexdigest()
- else:
- return line
-
-
-class RegularLogProcessor:
- rexes = [re.compile('()(.+?)()'),
- # Redact the rest of the line in the case we encounter
- # log-redaction-salt. Needed to redact ps output containing sgcollect flags safely.
- re.compile('(log-redaction-salt)(.+)')]
-
- def __init__(self, salt):
- self.salt = salt
-
- def _hash(self, match):
- result = match.group(1)
- if match.lastindex == 3:
- h = generate_hash(self.salt + match.group(2)).hexdigest()
- result += h + match.group(3)
- elif match.lastindex == 2:
- result += " "
- return result
-
- def do(self, line):
- for rex in self.rexes:
- line = rex.sub(self._hash, line)
- return line
-
-
-def generate_hash(val):
- return hashlib.sha1(val.encode())
-
-
-class AltExitC(object):
- def __init__(self):
- self.list = []
- self.lock = threading.Lock()
- atexit.register(self.at_exit_handler)
-
- def register(self, f):
- self.lock.acquire()
- self.register_and_unlock(f)
-
- def register_and_unlock(self, f):
- try:
- self.list.append(f)
- finally:
- self.lock.release()
-
- def at_exit_handler(self):
- self.lock.acquire()
- self.list.reverse()
- for f in self.list:
- try:
- f()
- except Exception:
- pass
-
- def exit(self, status):
- self.at_exit_handler()
- os._exit(status)
-
-
-AltExit = AltExitC()
-
-
-def log(message, end='\n'):
- sys.stderr.write(message + end)
- sys.stderr.flush()
-
-
-class Task(object):
- privileged = False
- no_header = False
- num_samples = 1
- interval = 0
-
- def __init__(self, description, command, timeout=None, **kwargs):
- self.description = description
- self.command = command
- self.timeout = timeout
- self.__dict__.update(kwargs)
-
- def execute(self, fp):
- """Run the task"""
- import subprocess
- use_shell = not isinstance(self.command, list)
- if "literal" in self.__dict__:
- print(self.literal, file=fp)
- return 0
-
- env = None
- if "addenv" in self.__dict__:
- env = os.environ.copy()
- env.update(self.addenv)
- try:
- p = subprocess.Popen(self.command, bufsize=-1,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT,
- shell=use_shell, env=env)
- except OSError as e:
- # if use_shell is False then Popen may raise exception
- # if binary is missing. In this case we mimic what
- # shell does. Namely, complaining to stderr and
- # setting non-zero status code. It's might also
- # automatically handle things like "failed to fork due
- # to some system limit".
- print("Failed to execute %s: %s" % (self.command, e), file=fp)
- return 127
- p.stdin.close()
-
- from threading import Timer, Event
-
- timer = None
- timer_fired = Event()
-
- if self.timeout is not None and hasattr(p, 'kill'):
- def on_timeout():
- p.kill()
- timer_fired.set()
-
- timer = Timer(self.timeout, on_timeout)
- timer.start()
-
- try:
- while True:
- data = p.stdout.read(64 * 1024)
- if not data:
- break
-
- fp.write(data)
- finally:
- if timer is not None:
- timer.cancel()
- timer.join()
-
- # there's a tiny chance that command succeeds just before
- # timer is fired; that would result in a spurious timeout
- # message
- if timer_fired.isSet():
- print("`%s` timed out after %s seconds" % (self.command, self.timeout), file=fp)
-
- return p.wait()
-
- def will_run(self):
- """Determine if this task will run on this platform."""
- return sys.platform.startswith(tuple(self.platforms))
-
-
-class PythonTask(object):
- """
- A task that takes a python function as an argument rather than an OS command.
- These will run on any platform.
- """
- privileged = False
- no_header = False
- num_samples = 1
- interval = 0
-
- def __init__(self, description, callable, timeout=None, **kwargs):
- self.description = description
- self.callable = callable
- self.command = "pythontask"
- self.timeout = timeout
- self.log_exception = False # default to false, may be overridden by val in **kwargs
- self.__dict__.update(kwargs)
-
- def execute(self, fp):
- """Run the task"""
- print("log_file: {0}. ".format(self.log_file))
- try:
- result = self.callable()
- try:
- fp.write(result.encode())
- except (UnicodeEncodeError, AttributeError):
- fp.write(result)
- return 0
- except Exception as e:
- if self.log_exception:
- print("Exception executing python task: {0}".format(e))
- return 1
-
- def will_run(self):
- """Determine if this task will run on this platform."""
- return True
-
-
-class TaskRunner(object):
-
- def __init__(self, verbosity=0, default_name="couchbase.log",
- tmp_dir=None):
- self.files = {}
- self.tasks = {}
- self.verbosity = verbosity
- self.start_time = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
- self.default_name = default_name
-
- if not tmp_dir:
- tmp_dir = None
- else:
- tmp_dir = os.path.abspath(os.path.expanduser(tmp_dir))
-
- try:
- self.tmpdir = tempfile.mkdtemp(dir=tmp_dir)
- except OSError as e:
- print("Could not use temporary dir {0}: {1}".format(tmp_dir, e))
- sys.exit(1)
-
- # If a dir wasn't passed by --tmp-dir, check if the env var was set and if we were able to use it
- if not tmp_dir and os.getenv("TMPDIR") and os.path.split(self.tmpdir)[0] != os.getenv("TMPDIR"):
- log("Could not use TMPDIR {0}".format(os.getenv("TMPDIR")))
- log("Using temporary dir {0}".format(os.path.split(self.tmpdir)[0]))
-
- AltExit.register(self.finalize)
-
- def finalize(self):
- try:
- for fp in self.files.items():
- fp.close()
- except Exception:
- pass
-
- shutil.rmtree(self.tmpdir, ignore_errors=True)
-
- def collect_file(self, filename):
- """Add a file to the list of files collected. Used to capture the exact
- file (including timestamps) from the Couchbase instance.
- filename - Absolute path to file to collect.
- """
- if filename not in self.files:
- self.files[filename] = open(filename, 'r')
- else:
- log("Unable to collect file '{0}' - already collected.".format(
- filename))
-
- def get_file(self, filename):
- if filename in self.files:
- fp = self.files[filename]
- else:
- fp = open(os.path.join(self.tmpdir, filename), 'wb+')
- self.files[filename] = fp
-
- return fp
-
- def header(self, fp, title, subtitle):
- separator = '=' * 78
- message = f"{separator}\n{title}\n{subtitle}\n{separator}\n"
- fp.write(message.encode())
-
- def log_result(self, result):
- if result == 0:
- log("OK")
- else:
- log("Exit code %d" % result)
-
- def run(self, task):
- """Run a task with a file descriptor corresponding to its log file"""
- if task.will_run():
- if hasattr(task, 'command_to_print'):
- command_to_print = task.command_to_print
- else:
- command_to_print = task.command
-
- log("%s (%s) - " % (task.description, command_to_print), end='')
- if task.privileged and os.getuid() != 0:
- log("skipped (needs root privs)")
- return
-
- if hasattr(task, 'log_file'):
- filename = task.log_file
- else:
- filename = self.default_name
-
- fp = self.get_file(filename)
- if not task.no_header:
- self.header(fp, task.description, command_to_print)
-
- for i in range(task.num_samples):
- if i > 0:
- log("Taking sample %d after %f seconds - " % (i+1, task.interval), end='')
- time.sleep(task.interval)
- result = task.execute(fp)
- self.log_result(result)
- fp.flush()
-
- elif self.verbosity >= 2:
- log('Skipping "%s" (%s): not for platform %s' % (task.description, task.command_to_print, sys.platform))
-
- def redact_and_zip(self, filename, log_type, salt, node):
- files = []
- redactor = LogRedactor(salt, self.tmpdir)
-
- for name, fp in self.files.items():
- if not (".gz" in name or
- "expvars.json" in name or
- os.path.basename(name) == "sync_gateway"):
- files.append(redactor.redact_file(name, fp.name))
- else:
- files.append(fp.name)
-
- prefix = f"{log_type}_{node}_{self.start_time}"
- self.__make_zip(prefix, filename, files)
-
- def zip(self, filename, log_type, node):
- files = [file.name for name, file in self.files.items()]
- prefix = f"{log_type}_{node}_{self.start_time}"
- self.__make_zip(prefix, filename, files)
-
- def close_all_files(self):
- for name, fp in self.files.items():
- fp.close()
-
- @staticmethod
- def __make_zip(prefix, filename, files):
- """Write all our logs to a zipfile"""
-
- from zipfile import ZipFile, ZIP_DEFLATED
- zf = ZipFile(filename, mode='w', compression=ZIP_DEFLATED)
- try:
- for name in files:
- zf.write(name, f"{prefix}/{os.path.basename(name)}")
- finally:
- zf.close()
-
-
-class SolarisTask(Task):
- platforms = ['sunos5', 'solaris']
-
-
-class LinuxTask(Task):
- platforms = ['linux']
-
-
-class WindowsTask(Task):
- platforms = ['win32', 'cygwin']
-
-
-class MacOSXTask(Task):
- platforms = ['darwin']
-
-
-class UnixTask(SolarisTask, LinuxTask, MacOSXTask):
- platforms = SolarisTask.platforms + LinuxTask.platforms + MacOSXTask.platforms
-
-
-class AllOsTask(UnixTask, WindowsTask):
- platforms = UnixTask.platforms + WindowsTask.platforms
-
-
-def make_curl_task(name, url, user="", password="", content_postprocessors=[],
- timeout=60, log_file="python_curl.log",
- **kwargs):
- """
- NOTE: this used to use curl but was later reworked to use pure python
- in order to be more cross platform, since Windows doesn't ship with curl
-
- The content_postprocessors is a list of functions that:
-
- - Are given a string as their only parameter
- - Return a string as their only return value
-
- For example:
-
- def reverser(s):
- return s[::-1] # reverse string
-
- They are run in order. This allows for stripping out passwords and other
- sensitive info
-
- """
- def python_curl_task():
- r = urllib.request.Request(url=url)
- if user and len(user) > 0:
- base64string = base64.b64encode(bytes('%s:%s' % (user, password),'utf-8'))
- r.add_header("Authorization", "Basic %s" % base64string.decode('utf-8'))
- try:
- response_file_handle = urllib.request.urlopen(r, timeout=timeout)
- except urllib.error.URLError as e:
- print("WARNING: Error connecting to url {0}: {1}".format(url, e))
-
- response_string = response_file_handle.read()
- for content_postprocessor in content_postprocessors:
- response_string = content_postprocessor(response_string)
- return response_string
-
- return PythonTask(
- description=name,
- callable=python_curl_task,
- log_file=log_file,
- **kwargs
- )
-
-
-def add_gzip_file_task(sourcefile_path, salt, content_postprocessors=[]):
- """
- Adds the extracted contents of a file to the output zip
-
- The content_postprocessors is a list of functions -- see make_curl_task
- """
- def python_add_file_task():
- with gzip.open(sourcefile_path, 'r') as infile:
- contents = infile.read().decode('utf-8')
- for content_postprocessor in content_postprocessors:
- contents = content_postprocessor(contents)
- redactor = LogRedactor(salt, tempfile.mkdtemp())
- contents = redactor.redact_string(contents)
-
- out = io.BytesIO()
- with gzip.GzipFile(fileobj=out, mode="w") as f:
- f.write(contents.encode())
- return out.getvalue()
-
- log_file = os.path.basename(sourcefile_path)
-
- task = PythonTask(
- description="Extracted contents of {0}".format(sourcefile_path),
- callable=python_add_file_task,
- log_file=log_file,
- log_exception=False,
- )
-
- task.no_header = True
-
- return task
-
-
-def add_file_task(sourcefile_path, content_postprocessors=[]):
- """
- Adds the contents of a file to the output zip
-
- The content_postprocessors is a list of functions -- see make_curl_task
- """
- def python_add_file_task():
- with open(sourcefile_path, 'br') as infile:
- contents = infile.read()
- for content_postprocessor in content_postprocessors:
- contents = content_postprocessor(contents)
- return contents
-
- task = PythonTask(
- description="Contents of {0}".format(sourcefile_path),
- callable=python_add_file_task,
- log_file=os.path.basename(sourcefile_path),
- log_exception=False,
- )
-
- return task
-
-
-def make_query_task(statement, user, password, port):
- url = "http://127.0.0.1:%s/query/service?statement=%s" % (port, urllib.parse.quote(statement))
-
- return make_curl_task(name="Result of query statement \'%s\'" % statement,
- user=user, password=password, url=url)
-
-
-def basedir():
- mydir = os.path.dirname(sys.argv[0])
- if mydir == "":
- mydir = "."
- return mydir
-
-
-def make_event_log_task():
- from datetime import datetime, timedelta
-
- # I found that wmic ntevent can be extremely slow; so limiting the output
- # to approximately last month
- limit = datetime.today() - timedelta(days=31)
- limit = limit.strftime('%Y%m%d000000.000000-000')
-
- return WindowsTask("Event log",
- "wmic ntevent where "
- "\""
- "(LogFile='application' or LogFile='system') and "
- "EventType<3 and TimeGenerated>'%(limit)s'"
- "\" "
- "get TimeGenerated,LogFile,SourceName,EventType,Message "
- "/FORMAT:list" % locals())
-
-
-def make_event_log_task_sg_info():
- from datetime import datetime, timedelta
-
- # I found that wmic ntevent can be extremely slow; so limiting the output
- # to approximately last month
- limit = datetime.today() - timedelta(days=31)
- limit = limit.strftime('%Y%m%d000000.000000-000')
-
- return WindowsTask("SG Event log",
- "wmic ntevent where "
- "\""
- "SourceName='SyncGateway' and "
- "TimeGenerated>'%(limit)s'"
- "\" "
- "get TimeGenerated,LogFile,SourceName,EventType,Message "
- "/FORMAT:list" % locals())
-
-
-def make_os_tasks(processes):
- programs = " ".join(processes)
-
- _tasks = [
- UnixTask("uname", "uname -a"),
- UnixTask("time and TZ", "date; date -u"),
- UnixTask("ntp time",
- "ntpdate -q pool.ntp.org || "
- "nc time.nist.gov 13 || "
- "netcat time.nist.gov 13", timeout=60),
- UnixTask("ntp peers", "ntpq -p"),
- UnixTask("raw /etc/sysconfig/clock", "cat /etc/sysconfig/clock"),
- UnixTask("raw /etc/timezone", "cat /etc/timezone"),
- WindowsTask("System information", "systeminfo"),
- WindowsTask("Computer system", "wmic computersystem"),
- WindowsTask("Computer OS", "wmic os"),
- LinuxTask("System Hardware", "lshw -json || lshw"),
- SolarisTask("Process list snapshot", "prstat -a -c -n 100 -t -v -L 1 10"),
- SolarisTask("Process list", "ps -ef"),
- SolarisTask("Service configuration", "svcs -a"),
- SolarisTask("Swap configuration", "swap -l"),
- SolarisTask("Disk activity", "zpool iostat 1 10"),
- SolarisTask("Disk activity", "iostat -E 1 10"),
- LinuxTask("Process list snapshot", "export TERM=''; top -Hb -n1 || top -H n1"),
- LinuxTask("Process list", "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,maj_flt,min_flt,pri,nice,vsize,rss,tty,stat,wchan:12,start,bsdtime,command"),
- LinuxTask("Raw /proc/vmstat", "cat /proc/vmstat"),
- LinuxTask("Raw /proc/mounts", "cat /proc/mounts"),
- LinuxTask("Raw /proc/partitions", "cat /proc/partitions"),
- LinuxTask("Raw /proc/diskstats", "cat /proc/diskstats; echo ''", num_samples=10, interval=1),
- LinuxTask("Raw /proc/interrupts", "cat /proc/interrupts"),
- LinuxTask("Swap configuration", "free -t"),
- LinuxTask("Swap configuration", "swapon -s"),
- LinuxTask("Kernel modules", "lsmod"),
- LinuxTask("Distro version", "cat /etc/redhat-release"),
- LinuxTask("Distro version", "lsb_release -a"),
- LinuxTask("Distro version", "cat /etc/SuSE-release"),
- LinuxTask("Distro version", "cat /etc/issue"),
- LinuxTask("Installed software", "rpm -qa"),
- # NOTE: AFAIK columns _was_ necessary, but it doesn't appear to be
- # required anymore. I.e. dpkg -l correctly detects stdout as not a
- # tty and stops playing smart on formatting. Lets keep it for few
- # years and then drop, however.
- LinuxTask("Installed software", "COLUMNS=300 dpkg -l"),
- LinuxTask("Extended iostat", "iostat -x -p ALL 1 10 || iostat -x 1 10"),
- LinuxTask("Core dump settings", "find /proc/sys/kernel -type f -name '*core*' -print -exec cat '{}' ';'"),
- UnixTask("sysctl settings", "sysctl -a"),
- LinuxTask("Relevant lsof output",
- "echo %(programs)s | xargs -n1 pgrep | xargs -n1 -r -- lsof -n -p" % locals()),
- LinuxTask("LVM info", "lvdisplay"),
- LinuxTask("LVM info", "vgdisplay"),
- LinuxTask("LVM info", "pvdisplay"),
- MacOSXTask("Process list snapshot", "top -l 1"),
- MacOSXTask("Disk activity", "iostat 1 10"),
- MacOSXTask("Process list",
- "ps -Aww -o user,pid,lwp,ppid,nlwp,pcpu,pri,nice,vsize,rss,tty,"
- "stat,wchan:12,start,bsdtime,command"),
- WindowsTask("Installed software", "wmic product get name, version"),
- WindowsTask("Service list", "wmic service where state=\"running\" GET caption, name, state"),
- WindowsTask("Process list", "wmic process"),
- WindowsTask("Process usage", "tasklist /V /fo list"),
- WindowsTask("Swap settings", "wmic pagefile"),
- WindowsTask("Disk partition", "wmic partition"),
- WindowsTask("Disk volumes", "wmic volume"),
- UnixTask("Network configuration", "ifconfig -a", interval=10,
- num_samples=2),
- LinuxTask("Network configuration", "echo link addr neigh rule route netns | xargs -n1 -- sh -x -c 'ip $1 list' --"),
- WindowsTask("Network configuration", "ipconfig /all", interval=10,
- num_samples=2),
- LinuxTask("Raw /proc/net/dev", "cat /proc/net/dev"),
- LinuxTask("Network link statistics", "ip -s link"),
- UnixTask("Network status", "netstat -anp || netstat -an"),
- WindowsTask("Network status", "netstat -ano"),
- AllOsTask("Network routing table", "netstat -rn"),
- LinuxTask("Network socket statistics", "ss -an"),
- LinuxTask("Extended socket statistics", "ss -an --info --processes"),
- UnixTask("Arp cache", "arp -na"),
- LinuxTask("Iptables dump", "iptables-save"),
- UnixTask("Raw /etc/hosts", "cat /etc/hosts"),
- UnixTask("Raw /etc/resolv.conf", "cat /etc/resolv.conf"),
- UnixTask("Raw /etc/nsswitch.conf", "cat /etc/nsswitch.conf"),
- WindowsTask("Arp cache", "arp -a"),
- WindowsTask("Network Interface Controller", "wmic nic"),
- WindowsTask("Network Adapter", "wmic nicconfig"),
- WindowsTask("Active network connection", "wmic netuse"),
- WindowsTask("Protocols", "wmic netprotocol"),
- WindowsTask("Hosts file", "type %SystemRoot%\system32\drivers\etc\hosts"),
- WindowsTask("Cache memory", "wmic memcache"),
- WindowsTask("Physical memory", "wmic memphysical"),
- WindowsTask("Physical memory chip info", "wmic memorychip"),
- WindowsTask("Local storage devices", "wmic logicaldisk"),
- UnixTask("Filesystem", "df -ha"),
- UnixTask("System activity reporter", "sar 1 10"),
- UnixTask("System paging activity", "vmstat 1 10"),
- UnixTask("System uptime", "uptime"),
- UnixTask("couchbase user definition", "getent passwd couchbase"),
- UnixTask("couchbase user limits", "su couchbase -c \"ulimit -a\"",
- privileged=True),
- UnixTask("sync_gateway user definition", "getent passwd sync_gateway"),
- UnixTask("sync_gateway user limits", "su sync_gateway -c \"ulimit -a\"",
- privileged=True),
- UnixTask("Interrupt status", "intrstat 1 10"),
- UnixTask("Processor status", "mpstat 1 10"),
- UnixTask("System log", "cat /var/adm/messages"),
- LinuxTask("Raw /proc/uptime", "cat /proc/uptime"),
- LinuxTask("Systemd journal", "journalctl 2>&1 | gzip -c",
- log_file="systemd_journal.gz", no_header=True),
- LinuxTask("All logs", "tar cz /var/log/syslog* /var/log/dmesg /var/log/messages* /var/log/daemon* /var/log/debug* /var/log/kern.log* 2>/dev/null",
- log_file="syslog.tar.gz", no_header=True),
- LinuxTask("Relevant proc data", "echo %(programs)s | "
- "xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; cat /proc/$1/status; cat /proc/$1/limits; cat /proc/$1/smaps; cat /proc/$1/numa_maps; cat /proc/$1/task/*/sched; echo' --" % locals()),
- LinuxTask("Processes' environment", "echo %(programs)s | "
- r"xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; ( cat /proc/$1/environ | tr \\0 \\n ); echo' --" % locals()),
- LinuxTask("NUMA data", "numactl --hardware"),
- LinuxTask("NUMA data", "numactl --show"),
- LinuxTask("NUMA data", "cat /sys/devices/system/node/node*/numastat"),
- UnixTask("Kernel log buffer", "dmesg -H || dmesg"),
- LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/enabled"),
- LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/defrag"),
- LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/enabled"),
- LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/defrag"),
- LinuxTask("Network statistics", "netstat -s"),
- LinuxTask("Full raw netstat", "cat /proc/net/netstat"),
- LinuxTask("CPU throttling info", "echo /sys/devices/system/cpu/cpu*/thermal_throttle/* | xargs -n1 -- sh -c 'echo $1; cat $1' --"),
- make_event_log_task(),
- make_event_log_task_sg_info(),
- ]
-
- return _tasks
-
-
-# stolen from http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
-def iter_flatten(iterable):
- it = iter(iterable)
- for e in it:
- if isinstance(e, (list, tuple)):
- for f in iter_flatten(e):
- yield f
- else:
- yield e
-
-
-def flatten(iterable):
- return [e for e in iter_flatten(iterable)]
-
-
-def read_guts(guts, key):
- return guts.get(key, "")
-
-
-def winquote_path(s):
- return '"'+s.replace("\\\\", "\\").replace('/', "\\")+'"'
-
-
-# python's split splits empty string to [''] which doesn't make any
-# sense. So this function works around that.
-def correct_split(string, splitchar):
- rv = string.split(splitchar)
- if rv == ['']:
- rv = []
- return rv
-
-
-def make_stats_archives_task(guts, initargs_path):
- escript = exec_name("escript")
- escript_wrapper = find_script("escript-wrapper")
- dump_stats = find_script("dump-stats")
- stats_dir = read_guts(guts, "stats_dir")
-
- if dump_stats is None or escript_wrapper is None or not stats_dir:
- return []
-
- return AllOsTask("stats archives",
- [escript,
- escript_wrapper,
- "--initargs-path", initargs_path, "--",
- dump_stats, stats_dir],
- no_header=True,
- log_file="stats_archives.json")
-
-
-def make_product_task(guts, initargs_path, options):
- root = os.path.abspath(os.path.join(initargs_path, "..", "..", "..", ".."))
- dbdir = read_guts(guts, "db_dir")
- viewdir = read_guts(guts, "idx_dir")
-
- diag_url = "http://127.0.0.1:%s/diag?noLogs=1" % read_guts(guts, "rest_port")
- if options.single_node_diag:
- diag_url += "&oneNode=1"
-
- from distutils.spawn import find_executable
-
- lookup_cmd = None
- for cmd in ["dig", "nslookup", "host"]:
- if find_executable(cmd) is not None:
- lookup_cmd = cmd
- break
-
- lookup_tasks = []
- if lookup_cmd is not None:
- lookup_tasks = [UnixTask("DNS lookup information for %s" % node,
- "%(lookup_cmd)s '%(node)s'" % locals())
- for node in correct_split(read_guts(guts, "nodes"), ",")]
-
- query_tasks = []
- query_port = read_guts(guts, "query_port")
- if query_port:
- def make(statement):
- return make_query_task(statement, user="@",
- password=read_guts(guts, "memcached_pass"),
- port=query_port)
-
- query_tasks = [make("SELECT * FROM system:datastores"),
- make("SELECT * FROM system:namespaces"),
- make("SELECT * FROM system:keyspaces"),
- make("SELECT * FROM system:indexes")]
-
- index_tasks = []
- index_port = read_guts(guts, "indexer_http_port")
- if index_port:
- url = 'http://127.0.0.1:%s/getIndexStatus' % index_port
- index_tasks = [make_curl_task(name="Index definitions are: ",
- user="@", password=read_guts(guts, "memcached_pass"), url=url)]
-
- fts_tasks = []
- fts_port = read_guts(guts, "fts_http_port")
- if fts_port:
- url = 'http://127.0.0.1:%s/api/diag' % fts_port
- fts_tasks = [make_curl_task(name="FTS /api/diag: ",
- user="@", password=read_guts(guts, "memcached_pass"), url=url)]
-
- _tasks = [
- UnixTask("Directory structure",
- ["ls", "-lRai", root]),
- UnixTask("Database directory structure",
- ["ls", "-lRai", dbdir]),
- UnixTask("Index directory structure",
- ["ls", "-lRai", viewdir]),
- UnixTask("couch_dbinfo",
- ["find", dbdir, "-type", "f",
- "-name", "*.couch.*",
- "-exec", "couch_dbinfo", "{}", "+"]),
- LinuxTask("Database directory filefrag info",
- ["find", dbdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
- LinuxTask("Index directory filefrag info",
- ["find", viewdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
- WindowsTask("Database directory structure",
- "dir /s " + winquote_path(dbdir)),
- WindowsTask("Index directory structure",
- "dir /s " + winquote_path(viewdir)),
- WindowsTask("Version file",
- "type " + winquote_path(basedir()) + "\\..\\VERSION.txt"),
- WindowsTask("Manifest file",
- "type " + winquote_path(basedir()) + "\\..\\manifest.txt"),
- WindowsTask("Manifest file",
- "type " + winquote_path(basedir()) + "\\..\\manifest.xml"),
- LinuxTask("Version file", "cat '%s/VERSION.txt'" % root),
- LinuxTask("Manifest file", "cat '%s/manifest.txt'" % root),
- LinuxTask("Manifest file", "cat '%s/manifest.xml'" % root),
- AllOsTask("Couchbase config", "", literal=read_guts(guts, "ns_config")),
- AllOsTask("Couchbase static config", "", literal=read_guts(guts, "static_config")),
- AllOsTask("Raw ns_log", "", literal=read_guts(guts, "ns_log")),
- # TODO: just gather those in python
- WindowsTask("Memcached logs",
- "cd " + winquote_path(read_guts(guts, "memcached_logs_path")) + " && " +
- "for /f %a IN ('dir /od /b memcached.log.*') do type %a",
- log_file="memcached.log"),
- UnixTask("Memcached logs",
- ["sh", "-c", 'cd "$1"; for file in $(ls -tr memcached.log.*); do cat \"$file\"; done', "--", read_guts(guts, "memcached_logs_path")],
- log_file="memcached.log"),
- [WindowsTask("Ini files (%s)" % p,
- "type " + winquote_path(p),
- log_file="ini.log")
- for p in read_guts(guts, "couch_inis").split(";")],
- UnixTask("Ini files",
- ["sh", "-c", 'for i in "$@"; do echo "file: $i"; cat "$i"; done', "--"] + read_guts(guts, "couch_inis").split(";"),
- log_file="ini.log"),
-
- make_curl_task(name="couchbase diags",
- user="@",
- password=read_guts(guts, "memcached_pass"),
- timeout=600,
- url=diag_url,
- log_file="diag.log"),
-
- make_curl_task(name="master events",
- user="@",
- password=read_guts(guts, "memcached_pass"),
- timeout=300,
- url='http://127.0.0.1:%s/diag/masterEvents?o=1' % read_guts(guts, "rest_port"),
- log_file="master_events.log",
- no_header=True),
-
- make_curl_task(name="ale configuration",
- user="@",
- password=read_guts(guts, "memcached_pass"),
- url='http://127.0.0.1:%s/diag/ale' % read_guts(guts, "rest_port"),
- log_file="couchbase.log"),
-
- [AllOsTask("couchbase logs (%s)" % name, "cbbrowse_logs %s" % name,
- addenv=[("REPORT_DIR", read_guts(guts, "log_path"))],
- log_file="ns_server.%s" % name)
- for name in ["debug.log", "info.log", "error.log", "couchdb.log",
- "xdcr.log", "xdcr_errors.log",
- "views.log", "mapreduce_errors.log",
- "stats.log", "babysitter.log", "ssl_proxy.log",
- "reports.log", "xdcr_trace.log", "http_access.log",
- "http_access_internal.log", "ns_couchdb.log",
- "goxdcr.log", "query.log", "projector.log", "indexer.log",
- "fts.log", "metakv.log"]],
-
- [AllOsTask("memcached stats %s" % kind,
-
- flatten(["cbstats", "-a", "127.0.0.1:%s" % read_guts(guts, "memcached_port"), kind, "-b", read_guts(guts, "memcached_admin"), "-p", read_guts(guts, "memcached_pass")]),
- log_file="stats.log",
- timeout=60)
- for kind in ["all", "allocator", "checkpoint", "config",
- "dcp", "dcpagg",
- ["diskinfo", "detail"], ["dispatcher", "logs"],
- "failovers", ["hash", "detail"],
- "kvstore", "kvtimings", "memory",
- "prev-vbucket",
- "runtimes", "scheduler",
- "tap", "tapagg",
- "timings", "uuid",
- "vbucket", "vbucket-details", "vbucket-seqno",
- "warmup", "workload"]],
-
- [AllOsTask("memcached mcstat %s" % kind,
- flatten(["mcstat", "-h", "127.0.0.1:%s" % read_guts(guts, "memcached_port"),
- "-u", read_guts(guts, "memcached_admin"),
- "-P", read_guts(guts, "memcached_pass"), kind]),
- log_file="stats.log",
- timeout=60)
- for kind in ["connections"]],
-
- [AllOsTask("ddocs for %s (%s)" % (bucket, path),
- ["couch_dbdump", path],
- log_file="ddocs.log")
- for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - set(correct_split(read_guts(guts, "memcached_buckets"), ","))
- for path in glob.glob(os.path.join(dbdir, bucket, "master.couch*"))],
- [AllOsTask("replication docs (%s)" % (path),
- ["couch_dbdump", path],
- log_file="ddocs.log")
- for path in glob.glob(os.path.join(dbdir, "_replicator.couch*"))],
-
- [AllOsTask("Couchstore local documents (%s, %s)" % (bucket, os.path.basename(path)),
- ["couch_dbdump", "--local", path],
- log_file="couchstore_local.log")
- for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - set(correct_split(read_guts(guts, "memcached_buckets"), ","))
- for path in glob.glob(os.path.join(dbdir, bucket, "*.couch.*"))],
-
- [UnixTask("moxi stats (port %s)" % port,
- "echo stats proxy | nc 127.0.0.1 %s" % port,
- log_file="stats.log",
- timeout=60)
- for port in correct_split(read_guts(guts, "moxi_ports"), ",")],
-
- [AllOsTask("mctimings",
- ["mctimings",
- "-u", read_guts(guts, "memcached_admin"),
- "-P", read_guts(guts, "memcached_pass"),
- "-h", "127.0.0.1:%s" % read_guts(guts, "memcached_port"),
- "-v"] + stat,
- log_file="stats.log",
- timeout=60)
- for stat in ([], ["subdoc_execute"])],
-
- make_stats_archives_task(guts, initargs_path)
- ]
-
- _tasks = flatten([lookup_tasks, query_tasks, index_tasks, fts_tasks, _tasks])
-
- return _tasks
-
-
-def find_script(name):
- dirs = [basedir(), os.path.join(basedir(), "scripts")]
- for d in dirs:
- path = os.path.join(d, name)
- if os.path.exists(path):
- log("Found %s: %s" % (name, path))
- return path
-
- return None
-
-
-def get_server_guts(initargs_path):
- dump_guts_path = find_script("dump-guts")
-
- if dump_guts_path is None:
- log("Couldn't find dump-guts script. Some information will be missing")
- return {}
-
- escript = exec_name("escript")
- extra_args = os.getenv("EXTRA_DUMP_GUTS_ARGS")
- args = [escript, dump_guts_path, "--initargs-path", initargs_path]
- if extra_args:
- args = args + extra_args.split(";")
- print("Checking for server guts in %s..." % initargs_path)
- p = subprocess.Popen(args, stdout=subprocess.PIPE)
- output = p.stdout.read()
- p.wait()
- # print("args: %s gave rc: %d and:\n\n%s\n" % (args, rc, output))
- tokens = output.rstrip("\0").split("\0")
- d = {}
- if len(tokens) > 1:
- for i in range(0, len(tokens), 2):
- d[tokens[i]] = tokens[i+1]
- return d
-
-
-def guess_utility(command):
- if isinstance(command, list):
- command = ' '.join(command)
-
- if not command:
- return None
-
- if re.findall(r'[|;&]|\bsh\b|\bsu\b|\bfind\b|\bfor\b', command):
- # something hard to easily understand; let the human decide
- return command
- else:
- return command.split()[0]
-
-
-def dump_utilities(*args, **kwargs):
- specific_platforms = {SolarisTask: 'Solaris',
- LinuxTask: 'Linux',
- WindowsTask: 'Windows',
- MacOSXTask: 'Mac OS X'}
- platform_utils = dict((name, set()) for name in list(specific_platforms.values()))
-
- class FakeOptions(object):
- def __getattr__(self, name):
- return None
-
- tasks = make_os_tasks() + make_product_task({}, "", FakeOptions())
-
- for task in tasks:
- utility = guess_utility(task.command)
- if utility is None:
- continue
-
- for (platform, name) in list(specific_platforms.items()):
- if isinstance(task, platform):
- platform_utils[name].add(utility)
-
- print("This is an autogenerated, possibly incomplete and flawed list of utilites used by cbcollect_info")
-
- for (name, utilities) in sorted(list(platform_utils.items()), key=lambda x: x[0]):
- print("\n%s:" % name)
-
- for utility in sorted(utilities):
- print(" - %s" % utility)
-
- sys.exit(0)
-
-
-def setup_stdin_watcher():
- def _in_thread():
- sys.stdin.readline()
- AltExit.exit(2)
- th = threading.Thread(target=_in_thread)
- th.setDaemon(True)
- th.start()
-
-
-class CurlKiller:
- def __init__(self, p):
- self.p = p
-
- def cleanup(self):
- if self.p is not None:
- print("Killing curl...")
- os.kill(self.p.pid, signal.SIGKILL)
- print("done")
-
- def disarm(self):
- self.p = None
-
-
-def do_upload_and_exit(path, url, proxy):
-
- f = open(path, 'rb')
-
- # mmap the file to reduce the amount of memory required (see bit.ly/2aNENXC)
- filedata = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
-
- # Get proxies from environment/system
- proxy_handler = urllib.request.ProxyHandler(urllib.request.getproxies())
- if proxy != "":
- # unless a proxy is explicitly passed, then use that instead
- proxy_handler = urllib.request.ProxyHandler({'https': proxy, 'http': proxy})
-
- opener = urllib.request.build_opener(proxy_handler)
- request = urllib.request.Request(url, data=filedata.read(), method='PUT')
- request.add_header(str('Content-Type'), str('application/zip'))
-
- exit_code = 0
- try:
- url = opener.open(request)
- if url.getcode() == 200:
- log('Done uploading')
- else:
- raise Exception('Error uploading, expected status code 200, got status code: {0}'.format(url.getcode()))
- except Exception as e:
- log(traceback.format_exc())
- exit_code = 1
-
- filedata.close()
- f.close()
-
- sys.exit(exit_code)
-
-
-def parse_host(host):
- url = urllib.parse.urlsplit(host)
- if not url.scheme:
- url = urllib.parse.urlsplit('https://' + host)
-
- return url.scheme, url.netloc, url.path
-
-
-def generate_upload_url(parser, options, zip_filename):
- upload_url = None
- if options.upload_host:
- if not options.upload_customer:
- parser.error("Need --customer when --upload-host is given")
-
- scheme, netloc, path = parse_host(options.upload_host)
-
- customer = urllib.parse.quote(options.upload_customer)
- fname = urllib.parse.quote(os.path.basename(zip_filename))
- if options.upload_ticket:
- full_path = '%s/%s/%d/%s' % (path, customer, options.upload_ticket, fname)
- else:
- full_path = '%s/%s/%s' % (path, customer, fname)
-
- upload_url = urllib.parse.urlunsplit((scheme, netloc, full_path, '', ''))
- log("Will upload collected .zip file into %s" % upload_url)
- return upload_url
-
-
-def check_ticket(option, opt, value):
- if re.match('^\d{1,7}$', value):
- return int(value)
- else:
- raise optparse.OptionValueError(
- "option %s: invalid ticket number: %r" % (opt, value))
-
-
-class CbcollectInfoOptions(optparse.Option):
- from copy import copy
-
- TYPES = optparse.Option.TYPES + ("ticket",)
- TYPE_CHECKER = copy(optparse.Option.TYPE_CHECKER)
- TYPE_CHECKER["ticket"] = check_ticket
-
-
-def find_primary_addr(default=None):
- s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
- try:
- try:
- s.connect(("8.8.8.8", 56))
- addr, port = s.getsockname()
- return addr
- except socket.error:
- return default
- finally:
- s.close()
-
-
-def exec_name(name):
- if sys.platform == 'win32':
- name += ".exe"
- return name