diff --git a/README.md b/README.md index 5d43f53..ee60e09 100644 --- a/README.md +++ b/README.md @@ -66,11 +66,11 @@ GLOBAL OPTIONS: --zone-id value The zone ID of the zone you are requesting logs for --zone-name value The name of the zone you are requesting logs for. logshare will automatically fetch the ID of this zone from the Cloudflare API --ray-id value The ray ID to request logs from (instead of a timestamp) - --start-time value The timestamp (in Unix seconds) to request logs from. Defaults to 30 minutes behind the current time (default: 1511219860) - --end-time value The timestamp (in Unix seconds) to request logs to. Defaults to 20 minutes behind the current time (default: 1511220460) + --start-time value The timestamp (in Unix seconds) to request logs from. Defaults to 30 minutes behind the current time (default: 1515607083) + --end-time value The timestamp (in Unix seconds) to request logs to. Defaults to 20 minutes behind the current time (default: 1515607683) --count value The number (count) of logs to retrieve. Pass '-1' to retrieve all logs for the given time period (default: 1) - --by-received (default behaviour) Retrieve logs by the processing time on Cloudflare. This mode allows you to fetch all available logs vs. based on the log timestamps themselves. - --legacy-endpoint (deprecated) Retrieve logs using the 'legacy' endpoint, where results are returned by log timestamp. + --sample value The sampling rate from 0.1 (10%) to 0.9 (90%) to use when retrieving logs (default: 0) + --timestamp-format value The timestamp format to use in logs: one of 'unix', 'unixnano', or 'rfc3339' (default: "unixnano") --fields value Select specific fields to retrieve in the log response. Pass a comma-separated list to fields to specify multiple fields. --list-fields List the available log fields for use with the --fields flag --google-storage-bucket value Full URI to a Google Cloud Storage Bucket to upload logs to @@ -84,18 +84,25 @@ In order to make retrieving logs more straightforward, you can provide the zone `--zone-name=` option, and logshare-cli will fetch the relevant zone ID for this zone before retrieving logs. - ### Useful Tips Although `logshare-cli` can be used in multiple ways, and for ingesting logs into a larger system, a common use-case is ad-hoc analysis of logs when troubleshooting or analyzing traffic. Here are a few examples that leverage [`jq`](https://stedolan.github.io/jq/) to parse log output. +#### Timestamps & Sampling + +By default, the Log Share endpoint provides logs with Unix nanosecond timestamps and the full set of available logs. + +* Pass the `timestamp-format=` flag with one of `unix`, `unixnano` (default) or `rfc3339` to customize the timestamps. +* Pass the `sample=` flag with a value between `0.1` (10%) or `0.9` (90%) to retrieve a random sample of logs. + #### Distribution of Edge (client-facing) Response Status Codes ``` $ logshare-cli --api-key= --api-email= --zone-name=example.com --start-time=1453307871 --count=20000 | jq '.[] | .EdgeResponseStatus empty' | sort -rn | uniq -c | sort -rn ``` + ``` 35954 200 4968 301 @@ -117,6 +124,7 @@ $ logshare-cli --api-key= --api-email= --zone-name=example.com --sta ``` $ logshare-cli --api-key= --api-email= --zone-name=example.com --list-fields | jq ``` + ``` { "CacheCacheStatus": "unknown | miss | expired | updating | stale | hit | ignored | bypass | revalidated", @@ -162,7 +170,7 @@ $ logshare-cli --api-key= --api-email= --zone-name=example.com --lis `logshare-cli` can be used to upload logs directly to GCS. In order to do so both `--google-storage-bucket` and `--google-project-id` must be provided. This will reroute log output to a file named `cloudflare_els__.json` in the bucket/project selected. The bucket will be created if it was not already, but the project must already exist. ``` -logshare-cli --api-key= --api-email= --zone-name=example.com --start-time 1502438905 +logshare-cli --api-key= --api-email= --zone-name=example.com --start-time 1502438905 --count 500 --google-storage-bucket=my-bucket --google-project-id=my-project-id ``` diff --git a/cmd/logshare-cli/logshare-cli b/cmd/logshare-cli/logshare-cli new file mode 100755 index 0000000..da61b95 Binary files /dev/null and b/cmd/logshare-cli/logshare-cli differ diff --git a/cmd/logshare-cli/main.go b/cmd/logshare-cli/main.go index ec91d1d..575f434 100644 --- a/cmd/logshare-cli/main.go +++ b/cmd/logshare-cli/main.go @@ -38,7 +38,7 @@ func main() { } } -func setupGoogleStr(projectId string, bucketName string, filename string) (*gcs.Writer, error) { +func setupGoogleStr(projectID string, bucketName string, filename string) (*gcs.Writer, error) { gCtx := context.Background() gClient, error := gcs.NewClient(gCtx) @@ -48,7 +48,7 @@ func setupGoogleStr(projectId string, bucketName string, filename string) (*gcs. gBucket := gClient.Bucket(bucketName) - if error = gBucket.Create(gCtx, projectId, nil); strings.Contains(error.Error(), "409") { + if error = gBucket.Create(gCtx, projectID, nil); strings.Contains(error.Error(), "409") { log.Printf("Bucket %v already exists.\n", bucketName) error = nil } else if error != nil { @@ -82,7 +82,7 @@ func run(conf *config) func(c *cli.Context) error { if conf.googleStorageBucket != "" { fileName := "cloudflare_els_" + conf.zoneID + "_" + strconv.Itoa(int(time.Now().Unix())) + ".json" - gcsWriter, err := setupGoogleStr(conf.googleProjectId, conf.googleStorageBucket, fileName) + gcsWriter, err := setupGoogleStr(conf.googleProjectID, conf.googleStorageBucket, fileName) if err != nil { return err } @@ -94,10 +94,11 @@ func run(conf *config) func(c *cli.Context) error { conf.apiKey, conf.apiEmail, &logshare.Options{ - // Pass the inverse of the legacy flag to invoke the old behaviour. - ByReceived: !conf.legacy, - Fields: conf.fields, - Dest: outputWriter, + Fields: conf.fields, + Dest: outputWriter, + ByReceived: true, + Sample: conf.sample, + TimestampFormat: conf.timestampFormat, }) if err != nil { return err @@ -112,12 +113,6 @@ func run(conf *config) func(c *cli.Context) error { if err != nil { return errors.Wrap(err, "failed to fetch field names") } - } else if conf.rayID != "" { - meta, err = client.GetFromRayID( - conf.zoneID, conf.rayID, conf.endTime, conf.count) - if err != nil { - return errors.Wrap(err, "failed to fetch via rayID") - } } else { meta, err = client.GetFromTimestamp( conf.zoneID, conf.startTime, conf.endTime, conf.count) @@ -139,16 +134,15 @@ func parseFlags(conf *config, c *cli.Context) error { conf.apiEmail = c.String("api-email") conf.zoneID = c.String("zone-id") conf.zoneName = c.String("zone-name") - conf.rayID = c.String("ray-id") conf.startTime = c.Int64("start-time") conf.endTime = c.Int64("end-time") conf.count = c.Int("count") - conf.byReceived = c.Bool("by-received") - conf.legacy = c.Bool("legacy-endpoint") + conf.timestampFormat = c.String("timestamp-format") + conf.sample = c.Float64("sample") conf.fields = c.StringSlice("fields") conf.listFields = c.Bool("list-fields") conf.googleStorageBucket = c.String("google-storage-bucket") - conf.googleProjectId = c.String("google-project-id") + conf.googleProjectID = c.String("google-project-id") return conf.Validate() } @@ -156,18 +150,17 @@ func parseFlags(conf *config, c *cli.Context) error { type config struct { apiKey string apiEmail string - rayID string zoneID string zoneName string startTime int64 endTime int64 count int - byReceived bool - legacy bool + timestampFormat string + sample float64 fields []string listFields bool googleStorageBucket string - googleProjectId string + googleProjectID string } func (conf *config) Validate() error { @@ -180,15 +173,11 @@ func (conf *config) Validate() error { return errors.New("zone-name OR zone-id must be set") } - if conf.legacy && conf.byReceived { - return errors.New("you must specify either --legacy-endpoint or --by-received (the default), not both. The default mode is --by-received") - } - - if len(conf.fields) > 0 && !conf.byReceived { - return errors.New("specifying --fields is only supported when using the --by-received endpoint") + if conf.sample != 0.0 && (conf.sample < 0.1 || conf.sample > 0.9) { + return errors.New("sample must be between 0.1 and 0.9") } - if (conf.googleStorageBucket == "") != (conf.googleProjectId == "") { + if (conf.googleStorageBucket == "") != (conf.googleProjectID == "") { return errors.New("Both google-storage-bucket and google-project-id must be provided to upload to Google Storage") } @@ -231,13 +220,15 @@ var flags = []cli.Flag{ Value: 1, Usage: "The number (count) of logs to retrieve. Pass '-1' to retrieve all logs for the given time period", }, - cli.BoolFlag{ - Name: "by-received", - Usage: "(default behaviour) Retrieve logs by the processing time on Cloudflare. This mode allows you to fetch all available logs vs. based on the log timestamps themselves.", + cli.Float64Flag{ + Name: "sample", + Value: 0.0, + Usage: "The sampling rate from 0.1 (10%) to 0.9 (90%) to use when retrieving logs", }, - cli.BoolFlag{ - Name: "legacy-endpoint", - Usage: "(deprecated) Retrieve logs using the 'legacy' endpoint, where results are returned by log timestamp.", + cli.StringFlag{ + Name: "timestamp-format", + Value: "unixnano", + Usage: "The timestamp format to use in logs: one of 'unix', 'unixnano', or 'rfc3339'", }, cli.StringSliceFlag{ Name: "fields", diff --git a/logshare.go b/logshare.go index 6027adf..ffc8f28 100644 --- a/logshare.go +++ b/logshare.go @@ -5,7 +5,9 @@ import ( "fmt" "io" "net/http" + "net/url" "os" + "strconv" "strings" "time" @@ -18,17 +20,25 @@ const ( byReceived = "received" ) +const ( + unix = "unix" + unixNano = "unixnano" + rfc3339 = "rfc3339" +) + // Client holds the current API credentials & HTTP client configuration. Client // should not be modified concurrently. type Client struct { - endpoint string - apiKey string - apiEmail string - byReceived bool - fields []string - httpClient *http.Client - dest io.Writer - headers http.Header + endpoint string + apiKey string + apiEmail string + byReceived bool + sample float64 + timestampFormat string + fields []string + httpClient *http.Client + dest io.Writer + headers http.Header } // Options for configuring log retrieval requests. @@ -41,6 +51,10 @@ type Options struct { Dest io.Writer // Fetch logs by the processing/received timestamp ByReceived bool + // Which timestamp format to use: one of "unix", "unixnano", "rfc3339" + TimestampFormat string + // Whether to only retrieve a sample of logs (0.1 to 0.9) + Sample float64 // The fields to return in the log responses Fields []string } @@ -66,7 +80,8 @@ func New(apiKey string, apiEmail string, options *Options) (*Client, error) { return nil, errors.New("apiEmail cannot be empty") } - var byReceived bool + // Default to the received endpoint. + var byReceived = true if options != nil { byReceived = options.ByReceived } @@ -81,79 +96,94 @@ func New(apiKey string, apiEmail string, options *Options) (*Client, error) { byReceived: byReceived, } - if options != nil && options.Fields != nil { - client.fields = options.Fields - } + if options != nil { + client.timestampFormat = options.TimestampFormat + client.sample = options.Sample - if options != nil && options.Dest != nil { - client.dest = options.Dest + if options.Dest != nil { + client.dest = options.Dest + } + + if options.Fields != nil { + client.fields = options.Fields + } } return client, nil } -func (c *Client) buildURL(zoneID string) string { - endpoint := byRequest - if c.byReceived { - endpoint = byReceived +func (c *Client) buildURL(zoneID string, params url.Values) (*url.URL, error) { + endpoint := byReceived + if !c.byReceived { + endpoint = byRequest } - return fmt.Sprintf("%s/zones/%s/logs/%s", c.endpoint, zoneID, endpoint) -} - -func (c *Client) addFieldParams(url string) string { - // The fields param is only supported on the Logpull endpoint - if !c.byReceived || len(c.fields) < 1 { - return url + u, err := url.Parse( + fmt.Sprintf("%s/zones/%s/logs/%s", + c.endpoint, + zoneID, + endpoint, + ), + ) + if err != nil { + return nil, err } - return url + "&fields=" + strings.Join(c.fields, ",") -} - -// GetFromRayID fetches logs for the given rayID, or starting at the given rayID -// if a non-zero end timestamp is provided. -func (c *Client) GetFromRayID(zoneID string, rayID string, end int64, count int) (*Meta, error) { - url := fmt.Sprintf("%s?start_id=%s", c.buildURL(zoneID), rayID) - - if end > 0 { - url += fmt.Sprintf("&end=%d", end) + if c.byReceived && len(c.fields) > 1 { + params.Set("fields", strings.Join(c.fields, ",")) } - if count > 0 { - url += fmt.Sprintf("&count=%d", count) + if c.sample != 0.0 { + params.Set("sample", strconv.FormatFloat(c.sample, 'f', 1, 64)) } - url = c.addFieldParams(url) + if c.timestampFormat != "" { + params.Set("timestamps", c.timestampFormat) + } - return c.request(url) + u.RawQuery = params.Encode() + return u, nil } // GetFromTimestamp fetches logs between the start and end timestamps provided, // (up to 'count' logs). func (c *Client) GetFromTimestamp(zoneID string, start int64, end int64, count int) (*Meta, error) { - url := fmt.Sprintf("%s?start=%d", c.buildURL(zoneID), start) + params := url.Values{} + params.Set("start", strconv.FormatInt(start, 10)) if end > 0 { - url += fmt.Sprintf("&end=%d", end) + params.Set("end", strconv.FormatInt(end, 10)) } if count > 0 { - url += fmt.Sprintf("&count=%d", count) + params.Set("count", strconv.Itoa(count)) } - url = c.addFieldParams(url) + u, err := c.buildURL(zoneID, params) + if err != nil { + return nil, err + } - return c.request(url) + return c.request(u) } // FetchFieldNames fetches the names of the available log fields. func (c *Client) FetchFieldNames(zoneID string) (*Meta, error) { - url := fmt.Sprintf("%s/zones/%s/logs/received/fields", c.endpoint, zoneID) - return c.request(url) + u, err := url.Parse( + fmt.Sprintf( + "%s/zones/%s/logs/received/fields", + c.endpoint, + zoneID, + ), + ) + if err != nil { + return nil, err + } + return c.request(u) } -func (c *Client) request(url string) (*Meta, error) { - req, err := http.NewRequest("GET", url, nil) +func (c *Client) request(u *url.URL) (*Meta, error) { + req, err := http.NewRequest("GET", u.String(), nil) if err != nil { return nil, errors.Wrap(err, "failed to create a request object") } @@ -174,7 +204,7 @@ func (c *Client) request(url string) (*Meta, error) { meta := &Meta{ StatusCode: resp.StatusCode, Duration: makeTimestamp() - start, - URL: url, + URL: u.String(), } if resp.StatusCode < 200 || resp.StatusCode > 299 {