Skip to content

Commit

Permalink
Implement and test the main crawler
Browse files Browse the repository at this point in the history
The main crawler essentially downloads the CSV data to be parsed from PhishStats url
  • Loading branch information
feed3r committed Mar 9, 2022
1 parent 887628c commit 639894d
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
29 changes: 29 additions & 0 deletions phishstats/ps_crawler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package phishstats

import (
"fmt"
"io"
"net/http"
)

const url = "https://phishstats.info/phish_score.csv"

func ReadData() (string, error) {
resp, err := http.Get(url)
if err != nil {
return "", err
}

defer resp.Body.Close()

if resp.StatusCode == http.StatusOK {
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
bodyString := string(bodyBytes)
return bodyString, nil
}

return "", fmt.Errorf("unexpected HTTP status: %d", resp.StatusCode)
}
27 changes: 27 additions & 0 deletions phishstats/ps_crawler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package phishstats

import (
"strings"
"testing"

"github.com/stretchr/testify/require"
)

const phishStatsPrefix = `######################################################################################
# PhishScore | PhishStats #
# Score ranges: 0-2 likely 2-4 suspicious 4-6 phishing 6-10 omg phishing! #
# Ranges may be adjusted without notice. List updated every 90 minutes. Do not crawl #
# too much at the risk of being blocked. #
# Many Phishing websites are legitimate websites that have been hacked, so keep that #
# in mind before blocking everything. Feed is provided as is, without any warrant. #
# CSV: Date,Score,URL,IP #
######################################################################################`

func TestReadData(t *testing.T) {

res, error := ReadData()
require.Nil(t, error)
require.NotNil(t, res)
require.True(t, strings.HasPrefix(res, phishStatsPrefix))

}

0 comments on commit 639894d

Please sign in to comment.