-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurl_finder.cpp
57 lines (46 loc) · 1.26 KB
/
url_finder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <iostream>
#include <vector>
#include <algorithm>
#include <cctype>
#include <string>
using namespace std;
bool not_url_char(char c){
static const string url_ch = "~;/?:@=&$-_.+!*'(),";
return !(isalnum(c) || find(url_ch.begin(), url_ch.end(), c) != url_ch.end());
}
string::const_iterator url_end(string::const_iterator b, string::const_iterator e){
return find_if(b, e, not_url_char);
}
string::const_iterator url_beg(string::const_iterator b, string::const_iterator e){
static const string sep = "://";
typedef string::const_iterator iter;
iter i=b;
while((i=search(i, e, sep.begin(), sep.end())) != e){
//Make sure i was not the last char in this row.
if(i != b && i+sep.size() != e){
iter beg=i;
//Find protocol-name previous.
//beg[-1] equals *(beg-1)
while(beg != b && isalpha(beg[-1]))
beg--;
if(beg != i && !not_url_char(i[sep.size()]))
return beg;
}
i += sep.size();
}
return e;
}
vector<string> find_urls(const string& s){
vector<string> ret;
typedef string::const_iterator iter;
iter b=s.begin(), e=s.end();
while(b!=e){
b=url_beg(b, e);
if(b!=e){
iter after = url_end(b, e);
ret.push_back(string(b, after));
b=after;
}
}
return ret;
}