Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jiangwei1995910 committed Jul 10, 2019
1 parent b1751e7 commit 56652cb
Showing 1 changed file with 53 additions and 53 deletions.
106 changes: 53 additions & 53 deletions lianjia.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,70 +48,70 @@ func crawlerOneCity(cityUrl string) {
if err := c.SetStorage(storage); err != nil {
panic(err)
}

c.OnHTML(".position a", func(element *colly.HTMLElement) {
u, err := url.Parse(cityUrl)
if err != nil {
panic(err)
}
rootUrl := u.Scheme + "://" + u.Host

goUrl := element.Attr("href")
u, err = url.Parse(goUrl)
if err != nil {
fmt.Println(err)
}
if u.Scheme == "" {
goUrl = rootUrl + u.Path
} else {
goUrl = u.String()
}
c.Visit(goUrl)

c.OnRequest(func(r *colly.Request) {
fmt.Println("列表抓取:", r.URL.String())
})

// 获取一页的数据
c.OnHTML(".LOGCLICKDATA", func(e *colly.HTMLElement) {
link := e.ChildAttr("a", "href")
c.OnHTML("body", func(element *colly.HTMLElement) {
// 获取一页的数据
element.ForEach(".LOGCLICKDATA", func(i int, e *colly.HTMLElement) {
link := e.ChildAttr("a", "href")

title := e.ChildText("a:first-child")
//fmt.Println(title)
title := e.ChildText("a:first-child")
//fmt.Println(title)

price := e.ChildText(".totalPrice")
price = strings.Replace(price, "万", "0000", 1)
//fmt.Println("总价:" + price)
iPrice, err := strconv.Atoi(price)
if err != nil {
iPrice = 0
}
price := e.ChildText(".totalPrice")
price = strings.Replace(price, "万", "0000", 1)
//fmt.Println("总价:" + price)
iPrice, err := strconv.Atoi(price)
if err != nil {
iPrice = 0
}

unitPrice := e.ChildAttr(".unitPrice", "data-price")
unitPrice := e.ChildAttr(".unitPrice", "data-price")

//fmt.Println("每平米:" + unitPrice)
//fmt.Println(e.Text)
//fmt.Println("每平米:" + unitPrice)
//fmt.Println(e.Text)

iUnitPrice, err := strconv.Atoi(unitPrice)
if err != nil {
iUnitPrice = 0
}
iUnitPrice, err := strconv.Atoi(unitPrice)
if err != nil {
iUnitPrice = 0
}
db.Add(bson.M{"Title": title, "TotalePrice": iPrice, "UnitPrice": iUnitPrice, "Link": link, "listCrawlTime": time.Now()})

db.Add(bson.M{"Title": title, "TotalePrice": iPrice, "UnitPrice": iUnitPrice, "Link": link, "listCrawlTime": time.Now()})
})

})
// 切换地点
element.ForEach(".position a", func(i int, element *colly.HTMLElement) {
u, err := url.Parse(cityUrl)
if err != nil {
panic(err)
}
rootUrl := u.Scheme + "://" + u.Host

c.OnHTML(".page-box", func(e *colly.HTMLElement) {
page := Page{}
json.Unmarshal([]byte(e.ChildAttr(".house-lst-page-box", "page-data")), &page)
//fmt.Println(page.TotalPage)
//fmt.Println(page.CurPage)
if page.CurPage < page.TotalPage {
c.Visit(cityUrl + "pg" + strconv.Itoa(page.CurPage+1) + "/")
}
goUrl := element.Attr("href")
u, err = url.Parse(goUrl)
if err != nil {
fmt.Println(err)
}
if u.Scheme == "" {
goUrl = rootUrl + u.Path
} else {
goUrl = u.String()
}
c.Visit(goUrl)
})

// 下一页
element.ForEach(".page-box", func(i int, element *colly.HTMLElement) {
var page Page
json.Unmarshal([]byte(element.ChildAttr(".house-lst-page-box", "page-data")), &page)
if page.CurPage < page.TotalPage {
c.Visit(cityUrl + "pg" + strconv.Itoa(page.CurPage+1) + "/")
}

})
})

c.OnRequest(func(r *colly.Request) {
fmt.Println("列表抓取:", r.URL.String())
})

c.Visit(cityUrl)
Expand Down Expand Up @@ -250,4 +250,4 @@ func main() {

<-listFlag
<-detailFlag
}
}

0 comments on commit 56652cb

Please sign in to comment.