Skip to content

Commit

Permalink
[ottl/pkg] Add support for locale in the Time converter (open-telemet…
Browse files Browse the repository at this point in the history
…ry#35107)

**Description:** <Describe what has changed.>
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->

Added support for locale in the `Time` converter, so it can parse
timestamps written in non-english languages.

The new `locale` parameter's value is optional, and can be specified as:
`Time("Febrero 25 lunes, 2002, 02:03:04 p.m.", "%B %d %A, %Y, %r",
"America/New_York", "es-ES")`

The value must be a well-formed BCP-47 language tag, and a known
[CLDR](https://cldr.unicode.org) v45 locale.

**Link to tracking Issue:**
open-telemetry#32978

**Testing:** Unit tests

**Documentation:** ottl/README was updated to include the new optional
`locale` parameter.
  • Loading branch information
edmocosta authored Sep 20, 2024
1 parent ed2610f commit d98a4f6
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 29 deletions.
27 changes: 27 additions & 0 deletions .chloggen/ottl_time_func_locale_support.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Added support for locale in the Time converter

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [32978]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
53 changes: 38 additions & 15 deletions internal/coreinternal/timeutils/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package timeutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/timeutils"

import (
"errors"
"fmt"
"regexp"
"strings"
Expand All @@ -28,27 +29,15 @@ func ParseStrptime(layout string, value any, location *time.Location) (time.Time
return ParseGotime(goLayout, value, location)
}

// ParseLocalizedStrptime is like ParseStrptime, but instead of parsing a formatted time in
// English, it parses a value in foreign language, and returns the [time.Time] it represents.
// The language argument must be a well-formed BCP 47 language tag (e.g.: "en", "en-US"), and
// a known CLDR locale.
// ParseLocalizedStrptime is like ParseLocalizedGotime, but instead of using the native Go time layout,
// it uses the ctime-like format.
func ParseLocalizedStrptime(layout string, value any, location *time.Location, language string) (time.Time, error) {
goLayout, err := strptime.ToNative(layout)
if err != nil {
return time.Time{}, err
}

stringValue, err := convertParsingValue(value)
if err != nil {
return time.Time{}, err
}

translatedVal, err := lunes.Translate(goLayout, stringValue, language)
if err != nil {
return time.Time{}, err
}

return ParseGotime(goLayout, translatedVal, location)
return ParseLocalizedGotime(goLayout, value, location, language)
}

func GetLocation(location *string, layout *string) (*time.Location, error) {
Expand All @@ -69,6 +58,24 @@ func GetLocation(location *string, layout *string) (*time.Location, error) {
return time.Local, nil
}

// ParseLocalizedGotime is like ParseGotime, but instead of parsing a formatted time in
// English, it parses a value in foreign language, and returns the [time.Time] it represents.
// The language argument must be a well-formed BCP 47 language tag (e.g.: "en", "en-US"), and
// a known CLDR locale.
func ParseLocalizedGotime(layout string, value any, location *time.Location, language string) (time.Time, error) {
stringValue, err := convertParsingValue(value)
if err != nil {
return time.Time{}, err
}

translatedVal, err := lunes.Translate(layout, stringValue, language)
if err != nil {
return time.Time{}, err
}

return ParseGotime(layout, translatedVal, location)
}

func ParseGotime(layout string, value any, location *time.Location) (time.Time, error) {
timeValue, err := parseGotime(layout, value, location)
if err != nil {
Expand Down Expand Up @@ -155,5 +162,21 @@ func ValidateGotime(layout string) error {
return nil
}

// ValidateLocale checks the given locale and returns an error if the language tag
// is not supported by the localized parser functions.
func ValidateLocale(locale string) error {
_, err := lunes.NewDefaultLocale(locale)
if err == nil {
return nil
}

var e *lunes.ErrUnsupportedLocale
if errors.As(err, &e) {
return fmt.Errorf("unsupported locale '%s', value must be a supported BCP 47 language tag", locale)
}

return fmt.Errorf("invalid locale '%s': %w", locale, err)
}

// Allows tests to override with deterministic value
var Now = time.Now
70 changes: 70 additions & 0 deletions internal/coreinternal/timeutils/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,73 @@ func TestParseLocalizedStrptimeInvalidType(t *testing.T) {
require.Error(t, err)
require.ErrorContains(t, err, "cannot be parsed as a time")
}

func TestParseLocalizedGotime(t *testing.T) {
tests := []struct {
name string
format string
value any
language string
expected time.Time
location *time.Location
}{
{
name: "Foreign language",
format: "January 02 Monday, 2006, 03:04:05 pm",
value: "Febrero 25 jueves, 1993, 02:03:04 p.m.",
expected: time.Date(1993, 2, 25, 14, 3, 4, 0, time.Local),
location: time.Local,
language: "es-ES",
},
{
name: "Foreign language with location",
format: "Monday Jan _2 2006",
value: "mercoledì set 4 2024",
expected: time.Date(2024, 9, 4, 0, 0, 0, 0, time.UTC),
location: time.UTC,
language: "it-IT",
},
{
name: "String value",
format: "January 02 Monday, 2006, 03:04:05 PM",
value: "March 12 Friday, 2004, 02:03:04 AM",
expected: time.Date(2004, 3, 12, 2, 3, 4, 0, time.Local),
location: time.Local,
language: "en",
},
{
name: "Bytes value",
format: "Jan 02 Mon, 06, 03:04:05 PM",
value: []byte("Jun 10 Fri, 04, 02:03:04 AM"),
expected: time.Date(2004, 6, 10, 2, 3, 4, 0, time.Local),
location: time.Local,
language: "en-US",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := ParseLocalizedGotime(tt.format, tt.value, tt.location, tt.language)
require.NoError(t, err)
assert.Equal(t, tt.expected.UnixNano(), result.UnixNano())
})
}
}

func TestParseLocalizedGotimeInvalidType(t *testing.T) {
value := time.Now().UnixNano()
_, err := ParseLocalizedStrptime("Mon", value, time.Local, "en")
require.Error(t, err)
require.ErrorContains(t, err, "cannot be parsed as a time")
}

func TestValidateLocale(t *testing.T) {
require.NoError(t, ValidateLocale("es"))
require.NoError(t, ValidateLocale("en-US"))
require.NoError(t, ValidateLocale("ca-ES-valencia"))
}

func TestValidateLocaleUnsupported(t *testing.T) {
err := ValidateLocale("foo-bar")
require.ErrorContains(t, err, "unsupported locale 'foo-bar'")
}
15 changes: 13 additions & 2 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1455,11 +1455,11 @@ Examples:

### Time

`Time(target, format, Optional[location])`
`Time(target, format, Optional[location], Optional[locale])`

The `Time` Converter takes a string representation of a time and converts it to a Golang `time.Time`.

`target` is a string. `format` is a string, `location` is an optional string.
`target` is a string. `format` is a string, `location` is an optional string, `locale` is an optional string.

If either `target` or `format` are nil, an error is returned. The parser used is the parser at [internal/coreinternal/parser](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/internal/coreinternal/timeutils). If the `target` and `format` do not follow the parsing rules used by this parser, an error is returned.

Expand Down Expand Up @@ -1519,6 +1519,17 @@ Examples:
- `Time("2012-11-01T22:08:41+0000 EST", "%Y-%m-%dT%H:%M:%S%z %Z")`
- `Time("2023-05-26 12:34:56", "%Y-%m-%d %H:%M:%S", "America/New_York")`

`locale` specifies the input language of the `target` value. It is used to interpret timestamp values written in a specific language,
ensuring that the function can correctly parse the localized month names, day names, and periods of the day based on the provided language.

The value must be a well-formed BCP 47 language tag, and a known [CLDR](https://cldr.unicode.org) v45 locale.
If not supplied, English (`en`) is used.

Examples:

- `Time("mercoledì set 4 2024", "%A %h %e %Y", "", "it")`
- `Time("Febrero 25 lunes, 2002, 02:03:04 p.m.", "%B %d %A, %Y, %r", "America/New_York", "es-ES")`

### TraceID

`TraceID(bytes)`
Expand Down
23 changes: 20 additions & 3 deletions pkg/ottl/ottlfuncs/func_time.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-c
import (
"context"
"fmt"
"time"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/timeutils"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
Expand All @@ -15,6 +16,7 @@ type TimeArguments[K any] struct {
Time ottl.StringGetter[K]
Format string
Location ottl.Optional[string]
Locale ottl.Optional[string]
}

func NewTimeFactory[K any]() ottl.Factory[K] {
Expand All @@ -27,10 +29,10 @@ func createTimeFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ot
return nil, fmt.Errorf("TimeFactory args must be of type *TimeArguments[K]")
}

return Time(args.Time, args.Format, args.Location)
return Time(args.Time, args.Format, args.Location, args.Locale)
}

func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Optional[string]) (ottl.ExprFunc[K], error) {
func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Optional[string], locale ottl.Optional[string]) (ottl.ExprFunc[K], error) {
if format == "" {
return nil, fmt.Errorf("format cannot be nil")
}
Expand All @@ -49,6 +51,16 @@ func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Op
if err != nil {
return nil, err
}

var inputTimeLocale *string
if !locale.IsEmpty() {
l := locale.Get()
if err = timeutils.ValidateLocale(l); err != nil {
return nil, err
}
inputTimeLocale = &l
}

return func(ctx context.Context, tCtx K) (any, error) {
t, err := inputTime.Get(ctx, tCtx)
if err != nil {
Expand All @@ -57,7 +69,12 @@ func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Op
if t == "" {
return nil, fmt.Errorf("time cannot be nil")
}
timestamp, err := timeutils.ParseGotime(gotimeFormat, t, loc)
var timestamp time.Time
if inputTimeLocale != nil {
timestamp, err = timeutils.ParseLocalizedGotime(gotimeFormat, t, loc, *inputTimeLocale)
} else {
timestamp, err = timeutils.ParseGotime(gotimeFormat, t, loc)
}
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit d98a4f6

Please sign in to comment.