Skip to content

Commit

Permalink
Adds option to display date of fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
tomnomnom committed Jun 10, 2018
1 parent 252b937 commit 0f32c1c
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
waybackurls
*.swp
*.tgz
*.zip
*.exe
38 changes: 29 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@ import (
"io/ioutil"
"net/http"
"os"
"time"
)

const fetchURL = "http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"
const fetchURL = "http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&collapse=urlkey"

func main() {

var domains []string

var dates bool
flag.BoolVar(&dates, "dates", false, "show date of fetch in the first column")

flag.Parse()

if flag.NArg() > 0 {
Expand All @@ -36,38 +40,54 @@ func main() {

for _, domain := range domains {

urls, err := getWaybackURLs(domain)
wurls, err := getWaybackURLs(domain)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to fetch URLs for [%s]\n", domain)
continue
}

for _, url := range urls {
fmt.Println(url)
for _, w := range wurls {
if dates {

d, err := time.Parse("20060102150405", w.date)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to parse date [%s] for URL [%s]\n", w.date, w.url)
}

fmt.Printf("%s %s\n", d.Format(time.RFC3339), w.url)

} else {
fmt.Println(w.url)
}
}
}

}

func getWaybackURLs(domain string) ([]string, error) {
type wurl struct {
date string
url string
}

out := make([]string, 0)
func getWaybackURLs(domain string) ([]wurl, error) {

res, err := http.Get(fmt.Sprintf(fetchURL, domain))
if err != nil {
return out, err
return []wurl{}, err
}

raw, err := ioutil.ReadAll(res.Body)

res.Body.Close()
if err != nil {
return out, err
return []wurl{}, err
}

var wrapper [][]string
err = json.Unmarshal(raw, &wrapper)

out := make([]wurl, 0, len(wrapper))

skip := true
for _, urls := range wrapper {
// The first item is always just the string "original",
Expand All @@ -76,7 +96,7 @@ func getWaybackURLs(domain string) ([]string, error) {
skip = false
continue
}
out = append(out, urls...)
out = append(out, wurl{date: urls[1], url: urls[2]})
}

return out, nil
Expand Down

0 comments on commit 0f32c1c

Please sign in to comment.