diff --git "a/Docs/\346\225\231\345\255\246\350\247\206\351\242\221\345\275\225\345\210\266.md" "b/Docs/\346\225\231\345\255\246\350\247\206\351\242\221\345\275\225\345\210\266.md" new file mode 100644 index 0000000..aa21f78 --- /dev/null +++ "b/Docs/\346\225\231\345\255\246\350\247\206\351\242\221\345\275\225\345\210\266.md" @@ -0,0 +1,15 @@ + +1. 软件发展史 +2. 设计理念 +3. 相关资料 + +4. 示例工程和调试 + +采集器的功能介绍 +- 全自动模式 +- 手动模式+检索 + + +5. 链家抓取(单页面/手气不错) + +6. 链家多页面(请求) diff --git a/Hawk.Core/Connectors/FileConnectorExcel.cs b/Hawk.Core/Connectors/FileConnectorExcel.cs index 376e0cb..8bd29c7 100644 --- a/Hawk.Core/Connectors/FileConnectorExcel.cs +++ b/Hawk.Core/Connectors/FileConnectorExcel.cs @@ -76,7 +76,7 @@ public override IEnumerable ReadFile(Action alreadyGetSize = public override IEnumerable WriteData(IEnumerable datas) { var xssfWb = new XSSFWorkbook(); - var wb = new SXSSFWorkbook(xssfWb, 100); + var wb = new SXSSFWorkbook(xssfWb, 1000); // IWorkbook workbook = new XSSFWorkbook(); var sheet1 = xssfWb.CreateSheet("Sheet1"); var sw = File.Create(FileName); diff --git a/Hawk.ETL/Process/SmartCrawler.cs b/Hawk.ETL/Process/SmartCrawler.cs index c2c069e..a120bcb 100644 --- a/Hawk.ETL/Process/SmartCrawler.cs +++ b/Hawk.ETL/Process/SmartCrawler.cs @@ -401,8 +401,11 @@ private async void GetXPathAsync() htmlTextBox.SelectionStart = node.StreamPosition; htmlTextBox.SelectionLength = node.OuterHtml.Length; var line = htmlTextBox.GetLineIndexFromCharacterIndex(node.StreamPosition); //返回指定字符串索引所在的行号 - //Debug.WriteLine(rows + ",," + line); - htmlTextBox.ScrollToLine(line + 1); //滚动到视图中指定行索引 + if (line > 0) + { + htmlTextBox.ScrollToLine(line + 1); //滚动到视图中指定行索引 + } + });