forked from dotnetcore/DotnetSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Lewis Zou
committed
Feb 15, 2017
1 parent
78a3630
commit 979555f
Showing
6 changed files
with
218 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
178 changes: 178 additions & 0 deletions
178
src/DotnetSpider2.Extension.Test/TargetUrlSelectorTest.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
using DotnetSpider.Core; | ||
using DotnetSpider.Extension.Model; | ||
using DotnetSpider.Extension.Model.Attribute; | ||
using DotnetSpider.Extension.Processor; | ||
using System; | ||
using System.Reflection; | ||
using Xunit; | ||
|
||
namespace DotnetSpider.Extension.Test | ||
{ | ||
public class TargetUrlSelectorTest | ||
{ | ||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
public class Entity14 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]", "//*[@id=\"2222\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
public class Entity16 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&", @"&page=[0-1]+&" })] | ||
public class Entity17 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector()] | ||
public class Entity15 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]", "//*[@id=\"2222\"]" }, Patterns = new[] { @"&page=[0-9]+&", @"&page=[0-1]+&" })] | ||
public class Entity18 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"2222\"]" }, Patterns = new[] { @"&page=[0-1]+&" })] | ||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
public class Entity19 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-1]+&" })] | ||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
public class Entity20 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
public class Entity21 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"2222\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
[TargetUrlsSelector(XPaths = new[] { "//*[@id=\"1111\"]" }, Patterns = new[] { @"&page=[0-9]+&" })] | ||
public class Entity22 : ISpiderEntity | ||
{ | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_1Region_1Pattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity14).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"222\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_2Region_1Pattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity16).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
|
||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"2222\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"2222\"]")[0].ToString()); | ||
|
||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_1Region_2Pattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity17).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(2, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[1].ToString()); | ||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_Null() | ||
{ | ||
try | ||
{ | ||
var entity2 = EntitySpider.ParseEntityMetaData(typeof(Entity15).GetTypeInfo()); | ||
var processor2 = new EntityProcessor(new Site(), entity2); | ||
} | ||
catch (Exception e) | ||
{ | ||
Assert.Equal("Region xpath and patterns should not be null both.", e.Message); | ||
} | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_2Region_2Pattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity18).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(2, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[1].ToString()); | ||
|
||
Assert.Equal(2, processor.GetTargetUrlPatterns("//*[@id=\"2222\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"2222\"]")[0].ToString()); | ||
Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"2222\"]")[1].ToString()); | ||
|
||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_Multi_2Region_2Pattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity19).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
|
||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"2222\"]").Count); | ||
Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"2222\"]")[0].ToString()); | ||
|
||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_Multi_2SameRegion_2Pattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity20).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(2, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[1].ToString()); | ||
|
||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_Multi_2SameRegion_2SamePattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity21).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
|
||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
|
||
[Fact] | ||
public void TargetUrlsSelector_Multi_2Region_2SamePattern() | ||
{ | ||
var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity22).GetTypeInfo()); | ||
var processor = new EntityProcessor(new Site(), entity1); | ||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); | ||
|
||
Assert.Equal(1, processor.GetTargetUrlPatterns("//*[@id=\"2222\"]").Count); | ||
Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"2222\"]")[0].ToString()); | ||
|
||
Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters