From 56652cb1b55fd20d33abefc95670248c671c62e5 Mon Sep 17 00:00:00 2001 From: jiangwei1995910 Date: Wed, 10 Jul 2019 21:57:05 +0800 Subject: [PATCH] fix --- lianjia.go | 106 ++++++++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/lianjia.go b/lianjia.go index 3ca0f35..0088185 100644 --- a/lianjia.go +++ b/lianjia.go @@ -48,70 +48,70 @@ func crawlerOneCity(cityUrl string) { if err := c.SetStorage(storage); err != nil { panic(err) } - - c.OnHTML(".position a", func(element *colly.HTMLElement) { - u, err := url.Parse(cityUrl) - if err != nil { - panic(err) - } - rootUrl := u.Scheme + "://" + u.Host - - goUrl := element.Attr("href") - u, err = url.Parse(goUrl) - if err != nil { - fmt.Println(err) - } - if u.Scheme == "" { - goUrl = rootUrl + u.Path - } else { - goUrl = u.String() - } - c.Visit(goUrl) - + c.OnRequest(func(r *colly.Request) { + fmt.Println("列表抓取:", r.URL.String()) }) - // 获取一页的数据 - c.OnHTML(".LOGCLICKDATA", func(e *colly.HTMLElement) { - link := e.ChildAttr("a", "href") + c.OnHTML("body", func(element *colly.HTMLElement) { + // 获取一页的数据 + element.ForEach(".LOGCLICKDATA", func(i int, e *colly.HTMLElement) { + link := e.ChildAttr("a", "href") - title := e.ChildText("a:first-child") - //fmt.Println(title) + title := e.ChildText("a:first-child") + //fmt.Println(title) - price := e.ChildText(".totalPrice") - price = strings.Replace(price, "万", "0000", 1) - //fmt.Println("总价:" + price) - iPrice, err := strconv.Atoi(price) - if err != nil { - iPrice = 0 - } + price := e.ChildText(".totalPrice") + price = strings.Replace(price, "万", "0000", 1) + //fmt.Println("总价:" + price) + iPrice, err := strconv.Atoi(price) + if err != nil { + iPrice = 0 + } - unitPrice := e.ChildAttr(".unitPrice", "data-price") + unitPrice := e.ChildAttr(".unitPrice", "data-price") - //fmt.Println("每平米:" + unitPrice) - //fmt.Println(e.Text) + //fmt.Println("每平米:" + unitPrice) + //fmt.Println(e.Text) - iUnitPrice, err := strconv.Atoi(unitPrice) - if err != nil { - iUnitPrice = 0 - } + iUnitPrice, err := strconv.Atoi(unitPrice) + if err != nil { + iUnitPrice = 0 + } + db.Add(bson.M{"Title": title, "TotalePrice": iPrice, "UnitPrice": iUnitPrice, "Link": link, "listCrawlTime": time.Now()}) - db.Add(bson.M{"Title": title, "TotalePrice": iPrice, "UnitPrice": iUnitPrice, "Link": link, "listCrawlTime": time.Now()}) + }) - }) + // 切换地点 + element.ForEach(".position a", func(i int, element *colly.HTMLElement) { + u, err := url.Parse(cityUrl) + if err != nil { + panic(err) + } + rootUrl := u.Scheme + "://" + u.Host - c.OnHTML(".page-box", func(e *colly.HTMLElement) { - page := Page{} - json.Unmarshal([]byte(e.ChildAttr(".house-lst-page-box", "page-data")), &page) - //fmt.Println(page.TotalPage) - //fmt.Println(page.CurPage) - if page.CurPage < page.TotalPage { - c.Visit(cityUrl + "pg" + strconv.Itoa(page.CurPage+1) + "/") - } + goUrl := element.Attr("href") + u, err = url.Parse(goUrl) + if err != nil { + fmt.Println(err) + } + if u.Scheme == "" { + goUrl = rootUrl + u.Path + } else { + goUrl = u.String() + } + c.Visit(goUrl) + }) + + // 下一页 + element.ForEach(".page-box", func(i int, element *colly.HTMLElement) { + var page Page + json.Unmarshal([]byte(element.ChildAttr(".house-lst-page-box", "page-data")), &page) + if page.CurPage < page.TotalPage { + c.Visit(cityUrl + "pg" + strconv.Itoa(page.CurPage+1) + "/") + } - }) + }) - c.OnRequest(func(r *colly.Request) { - fmt.Println("列表抓取:", r.URL.String()) }) c.Visit(cityUrl) @@ -250,4 +250,4 @@ func main() { <-listFlag <-detailFlag -} \ No newline at end of file +}