Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport: Removal of regex usage on denom validation #570

Merged
merged 11 commits into from
Mar 16, 2024
2 changes: 1 addition & 1 deletion tests/integration/bank/keeper/deterministic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ type DeterministicTestSuite struct {
}

var (
denomRegex = sdk.DefaultCoinDenomRegex()
denomRegex = `[a-zA-Z][a-zA-Z0-9/:._-]{2,127}`
addr1 = sdk.MustAccAddressFromBech32("cosmos139f7kncmglres2nf3h4hc4tade85ekfr8sulz5")
coin1 = sdk.NewCoin("denom", sdk.NewInt(10))
metadataAtom = banktypes.Metadata{
Expand Down
67 changes: 45 additions & 22 deletions types/coin.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"regexp"
"sort"
"strings"
"unicode"
)

//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -835,30 +836,15 @@ func (coins Coins) Sort() Coins {
return coins
}

//-----------------------------------------------------------------------------
// Parsing

var (
// Denominations can be 3 ~ 128 characters long and support letters, followed by either
// a letter, a number or a separator ('/', ':', '.', '_' or '-').
reDnmString = `[a-zA-Z][a-zA-Z0-9/:._-]{2,127}`
reDecAmt = `[[:digit:]]+(?:\.[[:digit:]]+)?|\.[[:digit:]]+`
reSpc = `[[:space:]]*`
reDnm *regexp.Regexp
reDecCoin *regexp.Regexp
)
reDecAmt = `[[:digit:]]+(?:\.[[:digit:]]+)?|\.[[:digit:]]+`
reSpc = `[[:space:]]*`

func init() {
SetCoinDenomRegex(DefaultCoinDenomRegex)
}
coinDenomRegex func() string

// DefaultCoinDenomRegex returns the default regex string
func DefaultCoinDenomRegex() string {
return reDnmString
}

// coinDenomRegex returns the current regex string and can be overwritten for custom validation
var coinDenomRegex = DefaultCoinDenomRegex
reDnm *regexp.Regexp
reDecCoin *regexp.Regexp
)

// SetCoinDenomRegex allows for coin's custom validation by overriding the regular
// expression string used for denom validation.
Expand All @@ -871,12 +857,49 @@ func SetCoinDenomRegex(reFn func() string) {

// ValidateDenom is the default validation function for Coin.Denom.
func ValidateDenom(denom string) error {
if !reDnm.MatchString(denom) {
if reDnm == nil || reDecCoin == nil {
// Convert the string to a byte slice as required by the Ragel-generated function.

// Call the Ragel-generated function.
if !MatchDenom(denom) {
return fmt.Errorf("invalid denom: %s", denom)
}
} else if !reDnm.MatchString(denom) { // If reDnm has been initialized, use it for matching.
return fmt.Errorf("invalid denom: %s", denom)
}

return nil
}

// isValidRune checks if a given rune is a valid character for a rune.
// It returns true if the rune is a letter, digit, '/', ':', '.', '_', or '-'.
func isValidRune(r rune) bool {
return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' || r == ':' || r == '.' || r == '_' || r == '-'
Comment on lines +876 to +877
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw we should change this upstream to just be bytes, and a couple of if statements. Rune's / these unicode calls are even more complex than what we need.

e.g. imagine checking [0-9/_] as byte >= 0x2F && byte <= 0x3A.

Copy link
Member

@ValarDragon ValarDragon Mar 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can make an issue for trying this upstream

}

// MatchDenom checks if the given string is a valid denomination.
// A valid denomination must have a length between 3 and 128 characters,
// start with a letter, and only contain valid runes.
func MatchDenom(s string) bool {
length := len(s)
if length < 3 || length > 128 {
return false
}

firstRune := rune(s[0])
if !unicode.IsLetter(firstRune) {
return false
}

for _, r := range s[1:] {
if !isValidRune(r) {
return false
}
}

return true
}

func mustValidateDenom(denom string) {
if err := ValidateDenom(denom); err != nil {
panic(err)
Expand Down
31 changes: 30 additions & 1 deletion types/coin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ func (s *coinTestSuite) TestCoinIsValid() {

func (s *coinTestSuite) TestCustomValidation() {
newDnmRegex := `[\x{1F600}-\x{1F6FF}]`
reDnmString := `[a-zA-Z][a-zA-Z0-9/:._-]{2,127}`

sdk.SetCoinDenomRegex(func() string {
return newDnmRegex
})
Expand All @@ -130,7 +132,7 @@ func (s *coinTestSuite) TestCustomValidation() {
for i, tc := range cases {
s.Require().Equal(tc.expectPass, tc.coin.IsValid(), "unexpected result for IsValid, tc #%d", i)
}
sdk.SetCoinDenomRegex(sdk.DefaultCoinDenomRegex)
sdk.SetCoinDenomRegex(func() string { return reDnmString })
}

func (s *coinTestSuite) TestCoinsDenoms() {
Expand Down Expand Up @@ -998,6 +1000,33 @@ func (s *coinTestSuite) TestParseCoins() {
}
}

func (s *coinTestSuite) TestValidateDenom() {
cases := []struct {
input string
valid bool
}{
{"", false},
{"stake", true},
{"stake,", false},
{"me coin", false},
{"me coin much", false},
{"not a coin", false},
{"foo:bar", true}, // special characters '/' | ':' | '.' | '_' | '-' are allowed
{"atom10", true}, // number in denom is allowed
{"transfer/channelToA/uatom", true},
{"ibc/7F1D3FCF4AE79E1554D670D1AD949A9BA4E4A3C76C63093E17E446A46061A7A2", true},
}

for _, tc := range cases {
err := sdk.ValidateDenom(tc.input)
if !tc.valid {
s.Require().Error(err)
} else {
s.Require().NoError(err)
}
}
}

func (s *coinTestSuite) TestSortCoins() {
good := sdk.Coins{
sdk.NewInt64Coin("gas", 1),
Expand Down
66 changes: 60 additions & 6 deletions types/dec_coin.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"fmt"
"sort"
"strings"
"unicode"

"github.com/pkg/errors"
)
Expand Down Expand Up @@ -621,14 +622,23 @@
// ParseDecCoin parses a decimal coin from a string, returning an error if
// invalid. An empty string is considered invalid.
func ParseDecCoin(coinStr string) (coin DecCoin, err error) {
coinStr = strings.TrimSpace(coinStr)
var amountStr, denomStr string
// if custom parsing has not been set, use default coin regex
if reDecCoin == nil {
amountStr, denomStr, err = ParseDecAmount(coinStr)
if err != nil {
return DecCoin{}, err
}
} else {
coinStr = strings.TrimSpace(coinStr)

matches := reDecCoin.FindStringSubmatch(coinStr)
if matches == nil {
return DecCoin{}, fmt.Errorf("invalid decimal coin expression: %s", coinStr)
}
matches := reDecCoin.FindStringSubmatch(coinStr)
if matches == nil {
return DecCoin{}, fmt.Errorf("invalid decimal coin expression: %s", coinStr)
}

amountStr, denomStr := matches[1], matches[2]
amountStr, denomStr = matches[1], matches[2]
}
mattverse marked this conversation as resolved.
Show resolved Hide resolved

amount, err := NewDecFromStr(amountStr)
if err != nil {
Expand All @@ -642,6 +652,50 @@
return NewDecCoinFromDec(denomStr, amount), nil
}

// ParseDecAmount parses the given string into amount, denomination.
func ParseDecAmount(coinStr string) (string, string, error) {
var amountRune, denomRune []rune

// Indicates the start of denom parsing
seenLetter := false
// Indicates we're currently parsing the amount
parsingAmount := true

for _, r := range strings.TrimSpace(coinStr) {
if parsingAmount {

Check failure on line 665 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 665 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 665 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 665 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 665 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 665 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / golangci-lint

ifElseChain: rewrite if-else to switch statement (gocritic)
if unicode.IsDigit(r) || r == '.' {

Check failure on line 666 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 666 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 666 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 666 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / Analyze

ifElseChain: rewrite if-else to switch statement (gocritic)

Check failure on line 666 in types/dec_coin.go

View workflow job for this annotation

GitHub Actions / golangci-lint

ifElseChain: rewrite if-else to switch statement (gocritic)
amountRune = append(amountRune, r)
} else if unicode.IsSpace(r) { // if space is seen, indicates that we have finished parsing amount
parsingAmount = false
} else if unicode.IsLetter(r) { // if letter is seen, indicates that it is the start of denom
parsingAmount = false
seenLetter = true
denomRune = append(denomRune, r)
} else { // Invalid character encountered in amount part
return "", "", fmt.Errorf("invalid character in coin string: %s", string(r))
}
} else if !seenLetter { // This logic flow is for skipping spaces between amount and denomination
if unicode.IsLetter(r) {
seenLetter = true
denomRune = append(denomRune, r)
} else if !unicode.IsSpace(r) {
// Invalid character before denomination starts
return "", "", fmt.Errorf("invalid start of denomination: %s", string(r))
}
} else {
// Parsing the denomination
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' || r == ':' || r == '.' || r == '_' || r == '-' {
denomRune = append(denomRune, r)
} else {
// Invalid character encountered in denomination part
return "", "", fmt.Errorf("invalid character in denomination: %s", string(r))
}
}
}

return string(amountRune), string(denomRune), nil
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The introduction of ParseDecAmount for parsing the amount and denomination from a coin string directly, without relying on regular expressions, is a significant improvement. This change aligns with the PR's objective and enhances the code's readability and maintainability. However, consider refactoring the if-else chain in lines 665-665 and 666-666 into a switch statement for improved clarity and maintainability.

for _, r := range strings.TrimSpace(coinStr) {
-   if parsingAmount {
+   switch {
+   case parsingAmount:
        if unicode.IsDigit(r) || r == '.' {
            amountRune = append(amountRune, r)
        } else if unicode.IsSpace(r) { // if space is seen, indicates that we have finished parsing amount
            parsingAmount = false
        } else if unicode.IsLetter(r) { // if letter is seen, indicates that it is the start of denom
            parsingAmount = false
            seenLetter = true
            denomRune = append(denomRune, r)
        } else { // Invalid character encountered in amount part
            return "", "", fmt.Errorf("invalid character in coin string: %s", string(r))
        }
-   } else if !seenLetter { // This logic flow is for skipping spaces between amount and denomination
+   case !seenLetter: // This logic flow is for skipping spaces between amount and denomination
        if unicode.IsLetter(r) {
            seenLetter = true
            denomRune = append(denomRune, r)
        } else if !unicode.IsSpace(r) {
            // Invalid character before denomination starts
            return "", "", fmt.Errorf("invalid start of denomination: %s", string(r))
        }
-   } else {
+   default:
        // Parsing the denomination
        if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' || r == ':' || r == '.' || r == '_' || r == '-' {
            denomRune = append(denomRune, r)
        } else {
            // Invalid character encountered in denomination part
            return "", "", fmt.Errorf("invalid character in denomination: %s", string(r))
        }
    }
}

Additionally, ensure comprehensive testing of ParseDecAmount to cover various edge cases and input scenarios, as this function is critical for parsing coin strings accurately.

Would you like me to help generate test cases for ParseDecAmount to ensure it handles edge cases effectively?


Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
// ParseDecAmount parses the given string into amount, denomination.
func ParseDecAmount(coinStr string) (string, string, error) {
var amountRune, denomRune []rune
// Indicates the start of denom parsing
seenLetter := false
// Indicates we're currently parsing the amount
parsingAmount := true
for _, r := range strings.TrimSpace(coinStr) {
if parsingAmount {
if unicode.IsDigit(r) || r == '.' {
amountRune = append(amountRune, r)
} else if unicode.IsSpace(r) { // if space is seen, indicates that we have finished parsing amount
parsingAmount = false
} else if unicode.IsLetter(r) { // if letter is seen, indicates that it is the start of denom
parsingAmount = false
seenLetter = true
denomRune = append(denomRune, r)
} else { // Invalid character encountered in amount part
return "", "", fmt.Errorf("invalid character in coin string: %s", string(r))
}
} else if !seenLetter { // This logic flow is for skipping spaces between amount and denomination
if unicode.IsLetter(r) {
seenLetter = true
denomRune = append(denomRune, r)
} else if !unicode.IsSpace(r) {
// Invalid character before denomination starts
return "", "", fmt.Errorf("invalid start of denomination: %s", string(r))
}
} else {
// Parsing the denomination
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' || r == ':' || r == '.' || r == '_' || r == '-' {
denomRune = append(denomRune, r)
} else {
// Invalid character encountered in denomination part
return "", "", fmt.Errorf("invalid character in denomination: %s", string(r))
}
}
}
return string(amountRune), string(denomRune), nil
}
for _, r := range strings.TrimSpace(coinStr) {
switch {
case parsingAmount:
if unicode.IsDigit(r) || r == '.' {
amountRune = append(amountRune, r)
} else if unicode.IsSpace(r) { // if space is seen, indicates that we have finished parsing amount
parsingAmount = false
} else if unicode.IsLetter(r) { // if letter is seen, indicates that it is the start of denom
parsingAmount = false
seenLetter = true
denomRune = append(denomRune, r)
} else { // Invalid character encountered in amount part
return "", "", fmt.Errorf("invalid character in coin string: %s", string(r))
}
case !seenLetter: // This logic flow is for skipping spaces between amount and denomination
if unicode.IsLetter(r) {
seenLetter = true
denomRune = append(denomRune, r)
} else if !unicode.IsSpace(r) {
// Invalid character before denomination starts
return "", "", fmt.Errorf("invalid start of denomination: %s", string(r))
}
default:
// Parsing the denomination
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '/' || r == ':' || r == '.' || r == '_' || r == '-' {
denomRune = append(denomRune, r)
} else {
// Invalid character encountered in denomination part
return "", "", fmt.Errorf("invalid character in denomination: %s", string(r))
}
}
}


// ParseDecCoins will parse out a list of decimal coins separated by commas. If the parsing is successuful,
// the provided coins will be sanitized by removing zero coins and sorting the coin set. Lastly
// a validation of the coin set is executed. If the check passes, ParseDecCoins will return the sanitized coins.
Expand Down
Loading