Skip to content

Commit

Permalink
Expose a simpler download API
Browse files Browse the repository at this point in the history
  • Loading branch information
horgh committed Mar 25, 2024
1 parent 2697024 commit 54897d4
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 124 deletions.
12 changes: 9 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

* `geoipupdate` now supports retrying on more types of errors
such as HTTP2 INTERNAL_ERROR.
* `HTTPReader` no longer retries on HTTP errors and therefore
`retryFor` was removed from `NewHTTPReader`.
* Now `geoipupdate` doesn't requires the user to specify the config file
even if all the other arguments are set via the environment variables.
Reported by jsf84ksnf. GitHub #284.
Expand All @@ -15,9 +13,17 @@
a database edition.
* `/geoip/databases/{edition-id}/download` which is responsible for downloading
the content of a database edition. This new endpoint redirects downloads to R2
presigned URLs, so systems running geoipupdate need to be able to reach
presigned URLs, so systems running `geoipupdate` need to be able to
reach
`mm-prod-geoip-databases.a2649acb697e2c09b632799562c076f2.r2.cloudflarestorage.com`
in addition to `updates.maxmind.com`.
* BREAKING CHANGE: The public package API has been redesigned. The previous
API was not easy to use and had become a maintenance burden. We now
expose a `Client` at `github.com/maxmind/geoipupdate/client` with a
`Download()` method. The intention is to expose less of the `geoipupdate`
internals and provide a simpler and easier to use package. Many
previously exposed methods and types are now either internal only or have
been removed.

## 6.1.0 (2024-01-09)

Expand Down
73 changes: 50 additions & 23 deletions client/client.go
Original file line number Diff line number Diff line change
@@ -1,38 +1,65 @@
// Package client is a client for downloading GeoIP2 and GeoLite2 MMDB
// databases.
package client

import (
"fmt"
"net/http"
)

// HTTPReader is a Reader that uses an HTTP client to retrieve
// databases.
type HTTPReader struct {
// client is an http client responsible of fetching database updates.
client *http.Client
// path is the request path.
path string
// accountID is used for request auth.
accountID int
// licenseKey is used for request auth.
// Client downloads GeoIP2 and GeoLite2 MMDB databases.
//
// After creation, it is valid for concurrent use.
type Client struct {
accountID int
endpoint string
httpClient *http.Client
licenseKey string
// verbose turns on/off debug logs.
verbose bool
}

// NewHTTPReader creates a Reader that downloads database updates via
// HTTP.
func NewHTTPReader(
path string,
// Option is an option for configuring Client.
type Option func(*Client)

// WithEndpoint sets the base endpoint to use. By default we use
// https://updates.maxmind.com.
func WithEndpoint(endpoint string) Option {
return func(c *Client) {
c.endpoint = endpoint
}
}

// WithHTTPClient sets the HTTP client to use. By default we use
// http.DefaultClient.
func WithHTTPClient(httpClient *http.Client) Option {
return func(c *Client) {
c.httpClient = httpClient
}
}

// New creates a Client.
func New(
accountID int,
licenseKey string,
verbose bool,
httpClient *http.Client,
) *HTTPReader {
return &HTTPReader{
client: httpClient,
path: path,
options ...Option,
) (Client, error) {
if accountID <= 0 {
return Client{}, fmt.Errorf("invalid account ID: %d", accountID)
}

if licenseKey == "" {
return Client{}, fmt.Errorf("invalid license key: %s", licenseKey)
}

c := Client{
accountID: accountID,
endpoint: "https://updates.maxmind.com",
httpClient: http.DefaultClient,
licenseKey: licenseKey,
verbose: verbose,
}

for _, opt := range options {
opt(&c)
}

return c, nil
}
137 changes: 86 additions & 51 deletions client/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"errors"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strconv"
"strings"
"time"
Expand All @@ -17,81 +17,124 @@ import (
"github.com/maxmind/geoipupdate/v6/internal/vars"
)

// Read attempts to fetch database updates for a specific editionID.
// It takes an editionID and its previously downloaded hash if available
// as arguments and returns a ReadResult struct as a response.
// It's the responsibility of the Writer to close the io.ReadCloser
// included in the response after consumption.
func (r *HTTPReader) Read(ctx context.Context, editionID, hash string) (*ReadResult, error) {
result, err := r.get(ctx, editionID, hash)
if err != nil {
return nil, fmt.Errorf("getting update for %s: %w", editionID, err)
}

return result, nil
// DownloadResponse describes the result of a Download call.
type DownloadResponse struct {
// LastModified is the date that the database was last modified. It will
// only be set if UpdateAvailable is true.
LastModified time.Time

// MD5 is the string representation of the new database. It will only be set
// if UpdateAvailable is true.
MD5 string

// Reader can be read to access the database itself. It will only contain a
// database if UpdateAvailable is true.
//
// If the Download call does not return an error, Reader will always be
// non-nil.
//
// If UpdateAvailable is true, the caller must read Reader to completion and
// close it.
Reader io.ReadCloser

// UpdateAvailable is true if there is an update available for download. It
// will be false if the MD5 used in the Download call matches what the server
// currently has.
UpdateAvailable bool
}

const downloadEndpoint = "%s/geoip/databases/%s/download?"

// get makes an http request to fetch updates for a specific editionID if any.
func (r *HTTPReader) get(
// Download attempts to download the edition.
//
// The editionID parameter is a valid database edition ID, such as
// "GeoIP2-City".
//
// The MD5 parameter is a string representation of the MD5 sum of the database
// MMDB file you have previously downloaded. If you don't yet have one
// downloaded, this can be "". This is used to know if an update is available
// and avoid consuming resources if there is not.
//
// If the current MD5 checksum matches what the server currently has, no
// download is performed.
func (c Client) Download(
ctx context.Context,
editionID string,
hash string,
) (result *ReadResult, err error) {
edition, err := r.getMetadata(ctx, editionID)
editionID,
md5 string,
) (DownloadResponse, error) {
metadata, err := c.getMetadata(ctx, editionID)
if err != nil {
return nil, err
return DownloadResponse{}, err
}

if edition.MD5 == hash {
if r.verbose {
log.Printf("No new updates available for %s", editionID)
}
return &ReadResult{EditionID: editionID, OldHash: hash, NewHash: hash}, nil
if metadata.MD5 == md5 {
return DownloadResponse{
Reader: io.NopCloser(strings.NewReader("")),
UpdateAvailable: false,
}, nil
}

reader, modifiedTime, err := c.download(ctx, editionID, metadata.Date)
if err != nil {
return DownloadResponse{}, err
}

date := strings.ReplaceAll(edition.Date, "-", "")
return DownloadResponse{
LastModified: modifiedTime,
MD5: metadata.MD5,
Reader: reader,
UpdateAvailable: true,
}, nil
}

const downloadEndpoint = "%s/geoip/databases/%s/download?"

func (c *Client) download(
ctx context.Context,
editionID,
date string,
) (io.ReadCloser, time.Time, error) {
date = strings.ReplaceAll(date, "-", "")

params := url.Values{}
params.Add("date", date)
params.Add("suffix", "tar.gz")

escapedEdition := url.PathEscape(edition.EditionID)
requestURL := fmt.Sprintf(downloadEndpoint, r.path, escapedEdition) + params.Encode()
escapedEdition := url.PathEscape(editionID)
requestURL := fmt.Sprintf(downloadEndpoint, c.endpoint, escapedEdition) + params.Encode()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil)
if err != nil {
return nil, fmt.Errorf("creating download request: %w", err)
return nil, time.Time{}, fmt.Errorf("creating download request: %w", err)
}
req.Header.Add("User-Agent", "geoipupdate/"+vars.Version)
req.SetBasicAuth(strconv.Itoa(r.accountID), r.licenseKey)
req.SetBasicAuth(strconv.Itoa(c.accountID), c.licenseKey)

response, err := r.client.Do(req)
response, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("performing download request: %w", err)
return nil, time.Time{}, fmt.Errorf("performing download request: %w", err)
}
// It is safe to close the response body reader as it wouldn't be
// consumed in case this function returns an error.
defer func() {
if err != nil {
// TODO(horgh): Should we fully consume the body?
response.Body.Close()
}
}()

if response.StatusCode != http.StatusOK {
// TODO(horgh): Should we fully consume the body?
//nolint:errcheck // we are already returning an error.
buf, _ := io.ReadAll(io.LimitReader(response.Body, 256))
httpErr := internal.HTTPError{
Body: string(buf),
StatusCode: response.StatusCode,
}
return nil, fmt.Errorf("unexpected HTTP status code: %w", httpErr)
return nil, time.Time{}, fmt.Errorf("unexpected HTTP status code: %w", httpErr)
}

gzReader, err := gzip.NewReader(response.Body)
if err != nil {
return nil, fmt.Errorf("encountered an error creating GZIP reader: %w", err)
return nil, time.Time{}, fmt.Errorf("encountered an error creating GZIP reader: %w", err)
}
defer func() {
if err != nil {
Expand All @@ -105,37 +148,29 @@ func (r *HTTPReader) get(
for {
header, err := tarReader.Next()
if err == io.EOF {
return nil, errors.New("tar archive does not contain an mmdb file")
return nil, time.Time{}, errors.New("tar archive does not contain an mmdb file")
}
if err != nil {
return nil, fmt.Errorf("reading tar archive: %w", err)
return nil, time.Time{}, fmt.Errorf("reading tar archive: %w", err)
}

if strings.HasSuffix(header.Name, ".mmdb") {
break
}
}

modifiedAt, err := parseTime(response.Header.Get("Last-Modified"))
lastModified, err := parseTime(response.Header.Get("Last-Modified"))
if err != nil {
return nil, fmt.Errorf("reading Last-Modified header: %w", err)
}

if r.verbose {
log.Printf("Updates available for %s", editionID)
return nil, time.Time{}, fmt.Errorf("reading Last-Modified header: %w", err)
}

return &ReadResult{
reader: editionReader{
return editionReader{
Reader: tarReader,
gzCloser: gzReader,
responseCloser: response.Body,
},
EditionID: editionID,
OldHash: hash,
NewHash: edition.MD5,
ModifiedAt: modifiedAt,
}, nil
lastModified,
nil
}

// parseTime parses a string representation of a time into time.Time according to the
Expand Down
16 changes: 7 additions & 9 deletions client/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strconv"
Expand All @@ -24,24 +23,23 @@ type metadata struct {
MD5 string `json:"md5"`
}

func (r *HTTPReader) getMetadata(ctx context.Context, editionID string) (*metadata, error) {
func (c *Client) getMetadata(
ctx context.Context,
editionID string,
) (*metadata, error) {
params := url.Values{}
params.Add("edition_id", editionID)

metadataRequestURL := fmt.Sprintf(metadataEndpoint, r.path) + params.Encode()

if r.verbose {
log.Printf("Requesting metadata for %s: %s", editionID, metadataRequestURL)
}
metadataRequestURL := fmt.Sprintf(metadataEndpoint, c.endpoint) + params.Encode()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, metadataRequestURL, nil)
if err != nil {
return nil, fmt.Errorf("creating metadata request: %w", err)
}
req.Header.Add("User-Agent", "geoipupdate/"+vars.Version)
req.SetBasicAuth(strconv.Itoa(r.accountID), r.licenseKey)
req.SetBasicAuth(strconv.Itoa(c.accountID), c.licenseKey)

response, err := r.client.Do(req)
response, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("performing metadata request: %w", err)
}
Expand Down
Loading

0 comments on commit 54897d4

Please sign in to comment.