Skip to content

Commit

Permalink
Merge pull request #17 from rzhade3/rzhade3/refactor-matching
Browse files Browse the repository at this point in the history
Output Shortlinks and refactor searching code
  • Loading branch information
utkusen authored Sep 24, 2022
2 parents b988e80 + d2afdc3 commit a9a3d96
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 63 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ You can download the pre-built binaries from the [releases](https://github.com/u

urlhunter requires 3 parameters to run: `-keywords`, `-date` and `-o`.

For example: `urlhunter -keywords keywords.txt -date 2020-11-20 -o out.txt`
For example: `urlhunter --keywords keywords.txt --date 2020-11-20 --o out.txt`

### --keywords

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ go 1.15

require (
github.com/fatih/color v1.10.0
github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3
github.com/schollz/progressbar/v3 v3.7.1
)
6 changes: 5 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
Expand All @@ -11,11 +12,14 @@ github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/Qd
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/schollz/progressbar v1.0.0 h1:gbyFReLHDkZo8mxy/dLWMr+Mpb1MokGJ1FqCiqacjZM=
github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3 h1:2YkbhM98YoshI0K0BD95IoCFx+KNN1L/G0P5WzY2kac=
github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3/go.mod h1:iTcJ+0KrnJXKBZvYH/Q6GKLhFuiXzD3z2PRae7xWqpY=
github.com/schollz/progressbar/v3 v3.7.1 h1:aQR/t6d+1nURSdoMn6c7n0vJi5xQ3KndpF0n7R5wrik=
github.com/schollz/progressbar/v3 v3.7.1/go.mod h1:CG/f0JmacksUc6TkZToO7tVq4t03zIQSQUtTd7F9GR4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 h1:umElSU9WZirRdgu2yFHY0ayQkEnKiOC1TtM3fWXFnoU=
Expand Down
123 changes: 62 additions & 61 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"time"

"github.com/fatih/color"
"github.com/rzhade3/beaconspec"
"github.com/schollz/progressbar/v3"
)

Expand Down Expand Up @@ -62,7 +63,6 @@ var err error
var archivesPath string

func main() {

var keywordFile string
var dateParam string
var outFile string
Expand All @@ -80,7 +80,6 @@ func main() {
flag.Usage = func() { fmt.Print(usage) }
flag.Parse()


if keywordFile == "" || dateParam == "" || outFile == "" {
crash("You must specify all arguments.", err)
return
Expand Down Expand Up @@ -191,7 +190,7 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {
_, err := Unzip(filepath.Join(archivesPath, fullname, item.Name), filepath.Join(archivesPath, fullname))
if err != nil {
os.Remove(filepath.Join(archivesPath, fullname, item.Name))
crash(item.Name + " looks damaged. It's removed now. Run the program again to re-download.", err)
crash(item.Name+" looks damaged. It's removed now. Run the program again to re-download.", err)
}
}

Expand Down Expand Up @@ -227,10 +226,9 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {
}

func searchFile(fileLocation string, keyword string, outfile string) {

var path string

if strings.HasPrefix(fileLocation, "archives"){
if strings.HasPrefix(fileLocation, "archives") {
path_parts := strings.Split(fileLocation, string(os.PathSeparator))
path = filepath.Join(path_parts[1], path_parts[2])
} else {
Expand All @@ -250,54 +248,71 @@ func searchFile(fileLocation string, keyword string, outfile string) {
panic(err)
}
defer f.Close()

metadata, err := beaconspec.ReadMetadata(fileLocation)
if err != nil {
warning(err.Error())
return
}

var matcher func([]byte) bool
if strings.HasPrefix(keyword, "regex") {
regexValue := strings.Split(keyword, " ")[1]
r, err := regexp.Compile(regexValue)
if err != nil {
warning("Invalid Regex!")
return
}
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
textToWrite := strings.Split(scanner.Text(), "|")[1]
if _, err := f.WriteString(textToWrite + "\n"); err != nil {
panic(err)
}
}
}
matcher, err = regexMatch(keyword)
} else if strings.Contains(keyword, ",") {
matcher, err = multiKeywordMatcher(keyword)
} else {
if strings.Contains(keyword, ",") {
keywords := strings.Split(keyword, ",")
for scanner.Scan() {
foundFlag := true
for i := 0; i < len(keywords); i++ {
if bytes.Contains(scanner.Bytes(), []byte(keywords[i])) {
continue
} else {
foundFlag = false
}
}
if foundFlag {
textToWrite := strings.Split(scanner.Text(), "|")[1]
if _, err := f.WriteString(textToWrite + "\n"); err != nil {
panic(err)
}
}
}
matcher, err = stringMatch(keyword)
}
if err != nil {
warning(err.Error())
return
}

for scanner.Scan() {
if matcher(scanner.Bytes()) {

} else {
toFind := []byte(keyword)
for scanner.Scan() {
if bytes.Contains(scanner.Bytes(), toFind) {
textToWrite := strings.Split(scanner.Text(), "|")[1]
if _, err := f.WriteString(textToWrite + "\n"); err != nil {
panic(err)
}
}
line, err := beaconspec.ParseLine(scanner.Text(), metadata)
if err != nil {
panic(err)
}
textToWrite := fmt.Sprintf("%s,%s\n", line.Source, line.Target)
if _, err := f.WriteString(textToWrite); err != nil {
panic(err)
}
}
}
}

func regexMatch(keyword string) (func([]byte) bool, error) {
regexValue := strings.Split(keyword, " ")[1]
r, err := regexp.Compile(regexValue)
return func(b []byte) bool {
s := string(b)
return r.MatchString(s)
}, err
}

func multiKeywordMatcher(keyword string) (func([]byte) bool, error) {
keywords := strings.Split(keyword, ",")
bytes_keywords := make([][]byte, len(keywords))
for i, k := range keywords {
bytes_keywords[i] = []byte(k)
}
return func(text []byte) bool {
for _, k := range bytes_keywords {
if !bytes.Contains(text, k) {
return false
}
}
return true
}, nil
}

func stringMatch(keyword string) (func([]byte) bool, error) {
bytes_keyword := []byte(keyword)
return func(b []byte) bool {
return bytes.Contains(b, bytes_keyword)
}, nil
}

func ifArchiveExists(fullname string) bool {
Expand Down Expand Up @@ -367,20 +382,6 @@ func downloadFile(url string) {
color.Green("Download Finished!")
}

func ByteCountSI(b int64) string {
const unit = 1000
if b < unit {
return fmt.Sprintf("%d B", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %cB",
float64(b)/float64(div), "kMGTPE"[exp])
}

func Unzip(src string, dest string) ([]string, error) {
var filenames []string
r, err := zip.OpenReader(src)
Expand Down Expand Up @@ -447,4 +448,4 @@ func crash(message string, err error) {

func warning(message string) {
color.Yellow("[WARNING]: " + message + "\n")
}
}

0 comments on commit a9a3d96

Please sign in to comment.