Skip to content

Commit

Permalink
domain: Add adguard matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
nekohasekai committed Jul 26, 2024
1 parent 7893a74 commit a2f9fef
Show file tree
Hide file tree
Showing 5 changed files with 325 additions and 94 deletions.
67 changes: 67 additions & 0 deletions common/domain/adguard_matcher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package domain_test

import (
"sort"
"testing"

"github.com/sagernet/sing/common/domain"

"github.com/stretchr/testify/require"
)

func TestAdGuardMatcher(t *testing.T) {
t.Parallel()
ruleLines := []string{
"||example.org^",
"|example.com^",
"example.net^",
"||example.edu",
"||example.edu.tw^",
"|example.gov",
"example.arpa",
}
matcher := domain.NewAdGuardMatcher(ruleLines)
require.NotNil(t, matcher)
matchDomain := []string{
"example.org",
"www.example.org",
"example.com",
"example.net",
"isexample.net",
"www.example.net",
"example.edu",
"example.edu.cn",
"example.edu.tw",
"www.example.edu",
"www.example.edu.cn",
"example.gov",
"example.gov.cn",
"example.arpa",
"www.example.arpa",
"isexample.arpa",
"example.arpa.cn",
"www.example.arpa.cn",
"isexample.arpa.cn",
}
notMatchDomain := []string{
"example.org.cn",
"notexample.org",
"example.com.cn",
"www.example.com.cn",
"example.net.cn",
"notexample.edu",
"notexample.edu.cn",
"www.example.gov",
"notexample.gov",
}
for _, domain := range matchDomain {
require.True(t, matcher.Match(domain), domain)
}
for _, domain := range notMatchDomain {
require.False(t, matcher.Match(domain), domain)
}
dLines := matcher.Dump()
sort.Strings(ruleLines)
sort.Strings(dLines)
require.Equal(t, ruleLines, dLines)
}
172 changes: 172 additions & 0 deletions common/domain/adgurad_matcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
package domain

import (
"bytes"
"sort"
"strings"

"github.com/sagernet/sing/common"
"github.com/sagernet/sing/common/varbin"
)

const (
anyLabel = '*'
suffixLabel = '\b'
)

type AdGuardMatcher struct {
set *succinctSet
}

func NewAdGuardMatcher(ruleLines []string) *AdGuardMatcher {
ruleList := make([]string, 0, len(ruleLines))
for _, ruleLine := range ruleLines {
var (
isSuffix bool // ||
hasStart bool // |
hasEnd bool // ^
)
if strings.HasPrefix(ruleLine, "||") {
ruleLine = ruleLine[2:]
isSuffix = true
} else if strings.HasPrefix(ruleLine, "|") {
ruleLine = ruleLine[1:]
hasStart = true
}
if strings.HasSuffix(ruleLine, "^") {
ruleLine = ruleLine[:len(ruleLine)-1]
hasEnd = true
}
if isSuffix {
ruleLine = string(rootLabel) + ruleLine
} else if !hasStart {
ruleLine = string(prefixLabel) + ruleLine
}
if !hasEnd {
if strings.HasSuffix(ruleLine, ".") {
ruleLine = ruleLine[:len(ruleLine)-1]
}
ruleLine += string(suffixLabel)
}
ruleList = append(ruleList, reverseDomain(ruleLine))
}
ruleList = common.Uniq(ruleList)
sort.Strings(ruleList)
return &AdGuardMatcher{newSuccinctSet(ruleList)}
}

func ReadAdGuardMatcher(reader varbin.Reader) (*AdGuardMatcher, error) {
set, err := readSuccinctSet(reader)
if err != nil {
return nil, err
}
return &AdGuardMatcher{set}, nil
}

func (m *AdGuardMatcher) Write(writer varbin.Writer) error {
return m.set.Write(writer)
}

func (m *AdGuardMatcher) Match(domain string) bool {
key := reverseDomain(domain)
if m.has([]byte(key), 0, 0) {
return true
}
for {
if m.has([]byte(string(suffixLabel)+key), 0, 0) {
return true
}
idx := strings.IndexByte(key, '.')
if idx == -1 {
return false
}
key = key[idx+1:]
}
}

func (m *AdGuardMatcher) has(key []byte, nodeId, bmIdx int) bool {
for i := 0; i < len(key); i++ {
currentChar := key[i]
for ; ; bmIdx++ {
if getBit(m.set.labelBitmap, bmIdx) != 0 {
return false
}
nextLabel := m.set.labels[bmIdx-nodeId]
if nextLabel == prefixLabel {
return true
}
if nextLabel == rootLabel {
nextNodeId := countZeros(m.set.labelBitmap, m.set.ranks, bmIdx+1)
hasNext := getBit(m.set.leaves, nextNodeId) != 0
if currentChar == '.' && hasNext {
return true
}
}
if nextLabel == currentChar {
break
}
if nextLabel == anyLabel {
idx := bytes.IndexRune(key[i:], '.')
nextNodeId := countZeros(m.set.labelBitmap, m.set.ranks, bmIdx+1)
if idx == -1 {
if getBit(m.set.leaves, nextNodeId) != 0 {
return true
}
idx = 0
}
nextBmIdx := selectIthOne(m.set.labelBitmap, m.set.ranks, m.set.selects, nextNodeId-1) + 1
if m.has(key[i+idx:], nextNodeId, nextBmIdx) {
return true
}
}
}
nodeId = countZeros(m.set.labelBitmap, m.set.ranks, bmIdx+1)
bmIdx = selectIthOne(m.set.labelBitmap, m.set.ranks, m.set.selects, nodeId-1) + 1
}
if getBit(m.set.leaves, nodeId) != 0 {
return true
}
for ; ; bmIdx++ {
if getBit(m.set.labelBitmap, bmIdx) != 0 {
return false
}
nextLabel := m.set.labels[bmIdx-nodeId]
if nextLabel == prefixLabel || nextLabel == rootLabel {
return true
}
}
}

func (m *AdGuardMatcher) Dump() (ruleLines []string) {
for _, key := range m.set.keys() {
key = reverseDomain(key)
var (
isSuffix bool
hasStart bool
hasEnd bool
)
if key[0] == prefixLabel {
key = key[1:]
} else if key[0] == rootLabel {
key = key[1:]
isSuffix = true
} else {
hasStart = true
}
if key[len(key)-1] == suffixLabel {
key = key[:len(key)-1]
} else {
hasEnd = true
}
if isSuffix {
key = "||" + key
} else if hasStart {
key = "|" + key
}
if hasEnd {
key += "^"
}
ruleLines = append(ruleLines, key)
}
return
}
102 changes: 52 additions & 50 deletions common/domain/matcher.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package domain

import (
"encoding/binary"
"sort"
"unicode/utf8"

"github.com/sagernet/sing/common/varbin"
)

const (
prefixLabel = '\r'
rootLabel = '\n'
)

type Matcher struct {
set *succinctSet
}
Expand All @@ -21,16 +25,16 @@ func NewMatcher(domains []string, domainSuffix []string, generateLegacy bool) *M
}
seen[domain] = true
if domain[0] == '.' {
domainList = append(domainList, reverseDomainSuffix(domain))
domainList = append(domainList, reverseDomain(string(prefixLabel)+domain))
} else if generateLegacy {
domainList = append(domainList, reverseDomain(domain))
suffixDomain := "." + domain
if !seen[suffixDomain] {
seen[suffixDomain] = true
domainList = append(domainList, reverseDomainSuffix(suffixDomain))
domainList = append(domainList, reverseDomain(string(prefixLabel)+suffixDomain))
}
} else {
domainList = append(domainList, reverseDomainRoot(domain))
domainList = append(domainList, reverseDomain(string(rootLabel)+domain))
}
}
for _, domain := range domains {
Expand All @@ -44,38 +48,60 @@ func NewMatcher(domains []string, domainSuffix []string, generateLegacy bool) *M
return &Matcher{newSuccinctSet(domainList)}
}

type matcherData struct {
Version uint8
Leaves []uint64
LabelBitmap []uint64
Labels []byte
}

func ReadMatcher(reader varbin.Reader) (*Matcher, error) {
matcher, err := varbin.ReadValue[matcherData](reader, binary.BigEndian)
set, err := readSuccinctSet(reader)
if err != nil {
return nil, err
}
set := &succinctSet{
leaves: matcher.Leaves,
labelBitmap: matcher.LabelBitmap,
labels: matcher.Labels,
}
set.init()
return &Matcher{set}, nil
}

func (m *Matcher) Write(writer varbin.Writer) error {
return m.set.Write(writer)
}

func (m *Matcher) Match(domain string) bool {
return m.set.Has(reverseDomain(domain))
return m.has(reverseDomain(domain))
}

func (m *Matcher) Write(writer varbin.Writer) error {
return varbin.Write(writer, binary.BigEndian, matcherData{
Version: 1,
Leaves: m.set.leaves,
LabelBitmap: m.set.labelBitmap,
Labels: m.set.labels,
})
func (m *Matcher) has(key string) bool {
var nodeId, bmIdx int
for i := 0; i < len(key); i++ {
currentChar := key[i]
for ; ; bmIdx++ {
if getBit(m.set.labelBitmap, bmIdx) != 0 {
return false
}
nextLabel := m.set.labels[bmIdx-nodeId]
if nextLabel == prefixLabel {
return true
}
if nextLabel == rootLabel {
nextNodeId := countZeros(m.set.labelBitmap, m.set.ranks, bmIdx+1)
hasNext := getBit(m.set.leaves, nextNodeId) != 0
if currentChar == '.' && hasNext {
return true
}
}
if nextLabel == currentChar {
break
}
}
nodeId = countZeros(m.set.labelBitmap, m.set.ranks, bmIdx+1)
bmIdx = selectIthOne(m.set.labelBitmap, m.set.ranks, m.set.selects, nodeId-1) + 1
}
if getBit(m.set.leaves, nodeId) != 0 {
return true
}
for ; ; bmIdx++ {
if getBit(m.set.labelBitmap, bmIdx) != 0 {
return false
}
nextLabel := m.set.labels[bmIdx-nodeId]
if nextLabel == prefixLabel || nextLabel == rootLabel {
return true
}
}
}

func (m *Matcher) Dump() (domainList []string, prefixList []string) {
Expand Down Expand Up @@ -119,27 +145,3 @@ func reverseDomain(domain string) string {
}
return string(b)
}

func reverseDomainSuffix(domain string) string {
l := len(domain)
b := make([]byte, l+1)
for i := 0; i < l; {
r, n := utf8.DecodeRuneInString(domain[i:])
i += n
utf8.EncodeRune(b[l-i:], r)
}
b[l] = prefixLabel
return string(b)
}

func reverseDomainRoot(domain string) string {
l := len(domain)
b := make([]byte, l+1)
for i := 0; i < l; {
r, n := utf8.DecodeRuneInString(domain[i:])
i += n
utf8.EncodeRune(b[l-i:], r)
}
b[l] = rootLabel
return string(b)
}
Loading

0 comments on commit a2f9fef

Please sign in to comment.