From e76834e58adfa84f71f60f5845bf2138ead62395 Mon Sep 17 00:00:00 2001 From: Ric Wright Date: Fri, 26 Aug 2016 14:09:20 -0500 Subject: [PATCH 1/6] Removed the annoying subversion folder/files that were checked in the with the google library, google-url --- ePub3/ThirdParty/google-url/.svn/all-wcprops | 35 - ePub3/ThirdParty/google-url/.svn/entries | 210 -- .../.svn/prop-base/LICENSE.txt.svn-base | 5 - .../.svn/prop-base/PRESUBMIT.py.svn-base | 5 - .../prop-base/PRESUBMIT_unittest.py.svn-base | 5 - .../.svn/prop-base/README.txt.svn-base | 5 - .../.svn/text-base/LICENSE.txt.svn-base | 65 - .../.svn/text-base/PRESUBMIT.py.svn-base | 108 - .../text-base/PRESUBMIT_unittest.py.svn-base | 88 - .../.svn/text-base/README.txt.svn-base | 185 -- .../text-base/codereview.settings.svn-base | 4 - .../google-url/base/.svn/all-wcprops | 47 - ePub3/ThirdParty/google-url/base/.svn/entries | 266 -- .../base/.svn/prop-base/README.txt.svn-base | 5 - .../base/.svn/prop-base/basictypes.h.svn-base | 5 - .../base/.svn/prop-base/logging.cc.svn-base | 5 - .../base/.svn/prop-base/logging.h.svn-base | 5 - .../base/.svn/prop-base/scoped_ptr.h.svn-base | 5 - .../base/.svn/prop-base/string16.cc.svn-base | 5 - .../base/.svn/prop-base/string16.h.svn-base | 5 - .../base/.svn/text-base/README.txt.svn-base | 2 - .../base/.svn/text-base/basictypes.h.svn-base | 88 - .../base/.svn/text-base/logging.cc.svn-base | 380 --- .../base/.svn/text-base/logging.h.svn-base | 482 ---- .../base/.svn/text-base/scoped_ptr.h.svn-base | 322 --- .../base/.svn/text-base/string16.cc.svn-base | 96 - .../base/.svn/text-base/string16.h.svn-base | 192 -- .../google-url/src/.svn/all-wcprops | 221 -- ePub3/ThirdParty/google-url/src/.svn/entries | 1252 ---------- .../src/.svn/prop-base/gurl.cc.svn-base | 5 - .../src/.svn/prop-base/gurl.h.svn-base | 5 - .../.svn/prop-base/gurl_test_main.cc.svn-base | 5 - .../.svn/prop-base/gurl_unittest.cc.svn-base | 5 - .../src/.svn/prop-base/url_canon.h.svn-base | 5 - .../.svn/prop-base/url_canon_etc.cc.svn-base | 5 - .../url_canon_filesystemurl.cc.svn-base | 5 - .../prop-base/url_canon_fileurl.cc.svn-base | 5 - .../.svn/prop-base/url_canon_host.cc.svn-base | 5 - .../.svn/prop-base/url_canon_icu.cc.svn-base | 5 - .../.svn/prop-base/url_canon_icu.h.svn-base | 5 - .../prop-base/url_canon_internal.cc.svn-base | 5 - .../prop-base/url_canon_internal.h.svn-base | 5 - .../url_canon_internal_file.h.svn-base | 5 - .../.svn/prop-base/url_canon_ip.cc.svn-base | 5 - .../.svn/prop-base/url_canon_ip.h.svn-base | 5 - .../prop-base/url_canon_mailtourl.cc.svn-base | 5 - .../.svn/prop-base/url_canon_path.cc.svn-base | 5 - .../prop-base/url_canon_pathurl.cc.svn-base | 5 - .../prop-base/url_canon_query.cc.svn-base | 5 - .../prop-base/url_canon_relative.cc.svn-base | 5 - .../prop-base/url_canon_stdstring.h.svn-base | 5 - .../prop-base/url_canon_stdurl.cc.svn-base | 5 - .../prop-base/url_canon_unittest.cc.svn-base | 5 - .../src/.svn/prop-base/url_common.h.svn-base | 5 - .../src/.svn/prop-base/url_file.h.svn-base | 5 - .../src/.svn/prop-base/url_parse.cc.svn-base | 5 - .../src/.svn/prop-base/url_parse.h.svn-base | 5 - .../.svn/prop-base/url_parse_file.cc.svn-base | 5 - .../prop-base/url_parse_internal.h.svn-base | 5 - .../prop-base/url_parse_unittest.cc.svn-base | 5 - .../.svn/prop-base/url_test_utils.h.svn-base | 5 - .../src/.svn/prop-base/url_util.cc.svn-base | 5 - .../src/.svn/prop-base/url_util.h.svn-base | 5 - .../prop-base/url_util_internal.h.svn-base | 5 - .../prop-base/url_util_unittest.cc.svn-base | 5 - .../src/.svn/text-base/gurl.cc.svn-base | 529 ---- .../src/.svn/text-base/gurl.h.svn-base | 392 --- .../.svn/text-base/gurl_test_main.cc.svn-base | 102 - .../.svn/text-base/gurl_unittest.cc.svn-base | 488 ---- .../src/.svn/text-base/url_canon.h.svn-base | 912 ------- .../.svn/text-base/url_canon_etc.cc.svn-base | 392 --- .../url_canon_filesystemurl.cc.svn-base | 158 -- .../text-base/url_canon_fileurl.cc.svn-base | 215 -- .../.svn/text-base/url_canon_host.cc.svn-base | 401 ---- .../.svn/text-base/url_canon_icu.cc.svn-base | 210 -- .../.svn/text-base/url_canon_icu.h.svn-base | 63 - .../text-base/url_canon_internal.cc.svn-base | 427 ---- .../text-base/url_canon_internal.h.svn-base | 461 ---- .../url_canon_internal_file.h.svn-base | 157 -- .../.svn/text-base/url_canon_ip.cc.svn-base | 730 ------ .../.svn/text-base/url_canon_ip.h.svn-base | 109 - .../text-base/url_canon_mailtourl.cc.svn-base | 137 -- .../.svn/text-base/url_canon_path.cc.svn-base | 378 --- .../text-base/url_canon_pathurl.cc.svn-base | 128 - .../text-base/url_canon_query.cc.svn-base | 189 -- .../text-base/url_canon_relative.cc.svn-base | 579 ----- .../text-base/url_canon_stdstring.h.svn-base | 134 -- .../text-base/url_canon_stdurl.cc.svn-base | 211 -- .../text-base/url_canon_unittest.cc.svn-base | 2133 ----------------- .../src/.svn/text-base/url_common.h.svn-base | 54 - .../src/.svn/text-base/url_file.h.svn-base | 108 - .../src/.svn/text-base/url_parse.cc.svn-base | 923 ------- .../src/.svn/text-base/url_parse.h.svn-base | 373 --- .../.svn/text-base/url_parse_file.cc.svn-base | 243 -- .../text-base/url_parse_internal.h.svn-base | 112 - .../text-base/url_parse_unittest.cc.svn-base | 649 ----- .../.svn/text-base/url_test_utils.h.svn-base | 78 - .../src/.svn/text-base/url_util.cc.svn-base | 618 ----- .../src/.svn/text-base/url_util.h.svn-base | 228 -- .../text-base/url_util_internal.h.svn-base | 56 - .../text-base/url_util_unittest.cc.svn-base | 310 --- .../google-url/third_party/.svn/all-wcprops | 5 - .../google-url/third_party/.svn/entries | 31 - 103 files changed, 17701 deletions(-) delete mode 100644 ePub3/ThirdParty/google-url/.svn/all-wcprops delete mode 100644 ePub3/ThirdParty/google-url/.svn/entries delete mode 100644 ePub3/ThirdParty/google-url/.svn/prop-base/LICENSE.txt.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT.py.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT_unittest.py.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/prop-base/README.txt.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/text-base/LICENSE.txt.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT.py.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT_unittest.py.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/text-base/README.txt.svn-base delete mode 100644 ePub3/ThirdParty/google-url/.svn/text-base/codereview.settings.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/all-wcprops delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/entries delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/README.txt.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/basictypes.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/scoped_ptr.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/README.txt.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/basictypes.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/logging.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/logging.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/scoped_ptr.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/string16.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/base/.svn/text-base/string16.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/all-wcprops delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/entries delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_test_main.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_etc.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_filesystemurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_fileurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_host.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal_file.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_mailtourl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_path.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_pathurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_query.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_relative.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdstring.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_common.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_file.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_file.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_internal.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_test_utils.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_internal.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_test_main.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_etc.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_filesystemurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_fileurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_host.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal_file.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_mailtourl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_path.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_pathurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_query.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_relative.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdstring.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdurl.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_common.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_file.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_file.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_internal.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_test_utils.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_internal.h.svn-base delete mode 100644 ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_unittest.cc.svn-base delete mode 100644 ePub3/ThirdParty/google-url/third_party/.svn/all-wcprops delete mode 100644 ePub3/ThirdParty/google-url/third_party/.svn/entries diff --git a/ePub3/ThirdParty/google-url/.svn/all-wcprops b/ePub3/ThirdParty/google-url/.svn/all-wcprops deleted file mode 100644 index 439be201b..000000000 --- a/ePub3/ThirdParty/google-url/.svn/all-wcprops +++ /dev/null @@ -1,35 +0,0 @@ -K 25 -svn:wc:ra_dav:version-url -V 23 -/svn/!svn/ver/181/trunk -END -LICENSE.txt -K 25 -svn:wc:ra_dav:version-url -V 33 -/svn/!svn/ver/2/trunk/LICENSE.txt -END -PRESUBMIT_unittest.py -K 25 -svn:wc:ra_dav:version-url -V 44 -/svn/!svn/ver/97/trunk/PRESUBMIT_unittest.py -END -PRESUBMIT.py -K 25 -svn:wc:ra_dav:version-url -V 35 -/svn/!svn/ver/97/trunk/PRESUBMIT.py -END -README.txt -K 25 -svn:wc:ra_dav:version-url -V 34 -/svn/!svn/ver/166/trunk/README.txt -END -codereview.settings -K 25 -svn:wc:ra_dav:version-url -V 43 -/svn/!svn/ver/160/trunk/codereview.settings -END diff --git a/ePub3/ThirdParty/google-url/.svn/entries b/ePub3/ThirdParty/google-url/.svn/entries deleted file mode 100644 index c6370c1b8..000000000 --- a/ePub3/ThirdParty/google-url/.svn/entries +++ /dev/null @@ -1,210 +0,0 @@ -10 - -dir -181 -http://google-url.googlecode.com/svn/trunk -http://google-url.googlecode.com/svn - - - -2012-12-20T21:49:49.616818Z -181 -brettw - - - - - - - - - - - - - - -8873c55e-713a-0410-88f8-23d9c3d90b1b - -build -dir - -LICENSE.txt -file - - - - -2013-01-16T16:12:52.000000Z -437ced1e9b232651b0912a9594da43b2 -2007-09-28T00:52:30.645870Z -2 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -3200 - -PRESUBMIT_unittest.py -file - - - - -2013-01-16T16:12:52.000000Z -3c5475af51397eb765f99344651a8be9 -2009-03-05T17:52:33.119165Z -97 -maruel@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -2468 - -PRESUBMIT.py -file - - - - -2013-01-16T16:12:52.000000Z -f250441a97520632fde21711b988e85b -2009-03-05T17:52:33.119165Z -97 -maruel@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -3371 - -src -dir - -third_party -dir - -base -dir - -README.txt -file - - - - -2013-01-16T16:12:52.000000Z -f298d924da4da368e095806905a3bb2e -2012-01-04T22:47:15.449237Z -166 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -8625 - -codereview.settings -file - - - - -2013-01-16T16:12:52.000000Z -8a41f612b58910f4251167a54f2e36bd -2011-11-04T20:02:49.002346Z -160 -maruel@chromium.org - - - - - - - - - - - - - - - - - - - - - -212 - diff --git a/ePub3/ThirdParty/google-url/.svn/prop-base/LICENSE.txt.svn-base b/ePub3/ThirdParty/google-url/.svn/prop-base/LICENSE.txt.svn-base deleted file mode 100644 index bdbd30518..000000000 --- a/ePub3/ThirdParty/google-url/.svn/prop-base/LICENSE.txt.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 6 -native -END diff --git a/ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT.py.svn-base b/ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT.py.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT.py.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT_unittest.py.svn-base b/ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT_unittest.py.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/.svn/prop-base/PRESUBMIT_unittest.py.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/.svn/prop-base/README.txt.svn-base b/ePub3/ThirdParty/google-url/.svn/prop-base/README.txt.svn-base deleted file mode 100644 index bdbd30518..000000000 --- a/ePub3/ThirdParty/google-url/.svn/prop-base/README.txt.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 6 -native -END diff --git a/ePub3/ThirdParty/google-url/.svn/text-base/LICENSE.txt.svn-base b/ePub3/ThirdParty/google-url/.svn/text-base/LICENSE.txt.svn-base deleted file mode 100644 index ac4083782..000000000 --- a/ePub3/ThirdParty/google-url/.svn/text-base/LICENSE.txt.svn-base +++ /dev/null @@ -1,65 +0,0 @@ -Copyright 2007, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -------------------------------------------------------------------------------- - -The file url_parse.cc is based on nsURLParsers.cc from Mozilla. This file is -licensed separately as follows: - -The contents of this file are subject to the Mozilla Public License Version -1.1 (the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" basis, -WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -for the specific language governing rights and limitations under the -License. - -The Original Code is mozilla.org code. - -The Initial Developer of the Original Code is -Netscape Communications Corporation. -Portions created by the Initial Developer are Copyright (C) 1998 -the Initial Developer. All Rights Reserved. - -Contributor(s): - Darin Fisher (original author) - -Alternatively, the contents of this file may be used under the terms of -either the GNU General Public License Version 2 or later (the "GPL"), or -the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), -in which case the provisions of the GPL or the LGPL are applicable instead -of those above. If you wish to allow use of your version of this file only -under the terms of either the GPL or the LGPL, and not to allow others to -use your version of this file under the terms of the MPL, indicate your -decision by deleting the provisions above and replace them with the notice -and other provisions required by the GPL or the LGPL. If you do not delete -the provisions above, a recipient may use your version of this file under -the terms of any one of the MPL, the GPL or the LGPL. diff --git a/ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT.py.svn-base b/ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT.py.svn-base deleted file mode 100644 index 6cfbe7492..000000000 --- a/ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT.py.svn-base +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2009 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -"""Top-level presubmit script for googleurl. - -See http://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts for -details on the presubmit API built into gcl. -""" - -# Files with these extensions will be considered source files -SOURCE_FILE_EXTENSIONS = [ - '.c', '.cc', '.cpp', '.h', '.m', '.mm', '.py', '.mk', '.am', '.json', -] -EXCLUDED_PATHS = [ - r".*third_party[\\\/].*", -] - -def ReadFile(path): - """Given a path, returns the full contents of the file. - - Reads files in binary format. - """ - fo = open(path, 'rb') - try: - contents = fo.read() - finally: - fo.close() - return contents - - -def CheckChangeOnUpload(input_api, output_api): - # TODO(brettw) Enforce 80 cols. - return LocalChecks(input_api, output_api, max_cols=0) - - -def CheckChangeOnCommit(input_api, output_api): - # TODO(brettw) Enforce 80 cols. - return (LocalChecks(input_api, output_api, max_cols=0) + - input_api.canned_checks.CheckDoNotSubmit(input_api, output_api)) - - -def LocalChecks(input_api, output_api, max_cols=80): - """Reports an error if for any source file in SOURCE_FILE_EXTENSIONS: - - uses CR (or CRLF) - - contains a TAB - - has a line that ends with whitespace - - contains a line >|max_cols| cols unless |max_cols| is 0. - - Note that the whole file is checked, not only the changes. - """ - cr_files = [] - results = [] - excluded_paths = [input_api.re.compile(x) for x in EXCLUDED_PATHS] - files = input_api.AffectedFiles() - for f in files: - path = f.LocalPath() - root, ext = input_api.os_path.splitext(path) - # Look for unsupported extensions. - if not ext in SOURCE_FILE_EXTENSIONS: - continue - # Look for excluded paths. - found = False - for item in excluded_paths: - if item.match(path): - found = True - break - if found: - continue - - # Need to read the file ourselves since AffectedFile.NewContents() - # will normalize line endings. - contents = ReadFile(path) - if '\r' in contents: - cr_files.append(path) - - local_errors = [] - # Remove EOL character. - lines = contents.splitlines() - line_num = 1 - for line in lines: - if line.endswith(' '): - local_errors.append(output_api.PresubmitError( - '%s, line %s ends with whitespaces.' % - (path, line_num))) - # Accept lines with http:// to exceed the max_cols rule. - if max_cols and len(line) > max_cols and not 'http://' in line: - local_errors.append(output_api.PresubmitError( - '%s, line %s has %s chars, please reduce to %d chars.' % - (path, line_num, len(line), max_cols))) - if '\t' in line: - local_errors.append(output_api.PresubmitError( - "%s, line %s contains a tab character." % - (path, line_num))) - line_num += 1 - # Just show the first 5 errors. - if len(local_errors) == 6: - local_errors.pop() - local_errors.append(output_api.PresubmitError("... and more.")) - break - results.extend(local_errors) - - if cr_files: - results.append(output_api.PresubmitError( - 'Found CR (or CRLF) line ending in these files, please use only LF:', - items=cr_files)) - return results diff --git a/ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT_unittest.py.svn-base b/ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT_unittest.py.svn-base deleted file mode 100644 index 19231db57..000000000 --- a/ePub3/ThirdParty/google-url/.svn/text-base/PRESUBMIT_unittest.py.svn-base +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2009 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -"""Unit tests for top-level Chromium presubmit script. -""" - - -import os -import PRESUBMIT -import re -import unittest - - -class MockInputApi(object): - def __init__(self): - self.affected_files = [] - self.re = re - self.os_path = os.path - - def AffectedFiles(self): - return self.affected_files - - def AffectedTextFiles(self, include_deletes=True): - return self.affected_files - - -class MockAffectedFile(object): - def __init__(self, path): - self.path = path - - def LocalPath(self): - return self.path - - -class MockOutputApi(object): - class PresubmitError(object): - def __init__(self, msg, items=[], long_text=''): - self.msg = msg - self.items = items - - -class PresubmitUnittest(unittest.TestCase): - def setUp(self): - self.file_contents = '' - def MockReadFile(path): - self.failIf(path.endswith('notsource')) - return self.file_contents - self._ReadFile = PRESUBMIT.ReadFile - PRESUBMIT.ReadFile = MockReadFile - - def tearDown(self): - PRESUBMIT.ReadFile = self._ReadFile - - def testLocalChecks(self): - api = MockInputApi() - api.affected_files = [ - MockAffectedFile('foo/blat/yoo.notsource'), - MockAffectedFile('third_party/blat/source.cc'), - MockAffectedFile('foo/blat/source.h'), - MockAffectedFile('foo/blat/source.mm'), - MockAffectedFile('foo/blat/source.py'), - ] - self.file_contents = 'file with \n\terror\nhere\r\nyes there' - # 3 source files, 2 errors by file + 1 global CR error. - self.failUnless(len(PRESUBMIT.LocalChecks(api, MockOutputApi)) == 7) - - self.file_contents = 'file\twith\ttabs' - # 3 source files, 1 error by file. - self.failUnless(len(PRESUBMIT.LocalChecks(api, MockOutputApi)) == 3) - - self.file_contents = 'file\rusing\rCRs' - # One global CR error. - self.failUnless(len(PRESUBMIT.LocalChecks(api, MockOutputApi)) == 1) - self.failUnless( - len(PRESUBMIT.LocalChecks(api, MockOutputApi)[0].items) == 3) - - self.file_contents = 'both\ttabs and\r\nCRLF' - # 3 source files, 1 error by file + 1 global CR error. - self.failUnless(len(PRESUBMIT.LocalChecks(api, MockOutputApi)) == 4) - - self.file_contents = 'file with\nzero \\t errors \\r\\n' - self.failIf(PRESUBMIT.LocalChecks(api, MockOutputApi)) - - -if __name__ == '__main__': - unittest.main() diff --git a/ePub3/ThirdParty/google-url/.svn/text-base/README.txt.svn-base b/ePub3/ThirdParty/google-url/.svn/text-base/README.txt.svn-base deleted file mode 100644 index d5f79a358..000000000 --- a/ePub3/ThirdParty/google-url/.svn/text-base/README.txt.svn-base +++ /dev/null @@ -1,185 +0,0 @@ - ============================== - The Google URL Parsing Library - ============================== - -This is the Google URL Parsing Library which parses and canonicalizes URLs. -Please see the LICENSE.txt file for licensing information. - -Features -======== - - * Easily embeddable: This library was written for a variety of client and - server programs in mind, so unlike most implementations of URL parsing - and canonicalization, it can be easily emdedded. - - * Fast: hundreds of thousands of typical URLs can be parsed and - canonicalized per second on a modern CPU. It is much faster than, for - example, calling WinInet's corresponding functions. - - * Compatible: When possible, this library has strived for IE7 compatability - for both general web compatability, and so IE addons or other applications - that communicate with or embed IE will work properly. - - It supports Unix-style file URLs, as well as the more complex rules for - Window file URLs. Note that total compatability is not possible (for - example, IE6 and IE7 disagree about how to parse certain IP addresses), - and that this is more strict about certain illegal, rarely used, and - potentially dangerous constructs such as escaped control characters in - host names that IE will allow. It is typically a little less strict than - Firefox. - - -Example -======= - -An example implementation of a URL object that uses this library is provided -in src/gurl.*. This implementation uses the "application integration" layer -discussed below to interface with the low-level parsing and canonicalization -functions. - - -Building -======== - -The canonicalization files require ICU for some UTF-8 and UTF-16 conversion -macros. If your project does not use ICU, it should be straightforward to -factor out the macros and functions used in ICU, there are only a few well- -isolated things that are used. - -TODO(brettw) ADD INSTRUCTIONS FOR GETTING ICU HERE! - -logging.h and logging.cc are Windows-only because the corresponding Unix -logging system has many dependencies. This library uses few of the logging -macros, and a dummy header can easily be written that defines the -appropriate things for Unix. - - -Definitions -=========== - -"Standard URL": A URL with an "authority", which is a hostname and optionally - a port, username, and password. Most URLs are standard such as HTTP and FTP. - -"File URL": A URL that references a file on disk. There are special rules for - this type of URL. Note that it may have a hostname! "localhost" is allowed, - for example "file://localhost/foo" is the same as "file:///foo". - -"FileSystem URL": A URL referring to a file reached via the FileSystem API - described at http://www.w3.org/TR/file-system-api/. These are nested URLs, - with compound schemes of e.g. "filesystem:file:" or "filesystem:https:". - Parsed FileSystem URLs will have a nested inner_parsed() object containing - information about the inner URL. - -"Path URL": This is everything else. There is no standard on how to treat these - URLs, or even what they are called. This library decomposes them into a - scheme and a path. The path is everything following the scheme. This type of - URL includes "javascript", "data", and even "mailto" (although "mailto" - might look like a standard scheme in some respects, it is not). - -Design -====== - -The library is divided into four layers. They are listed here from the lowest -to the highest; you can use any portion of the library as long as you embed the -layers below it. - -1. Parsing ----------- -At the lowest level is the parsing code. The files encompassing this are -url_parse.* and the main include file is src/url_parse.h. This code will, given -an input string, parse it into the most likely form of a URL. - -Parsing cannot fail and does no validation. The exception is the port number, -which it currently validates, but this is a bug. Given crazy input, the parser -will do its best to find the various URL components according to its rules (see -url_parse_unittest.cc for some examples). - -To use this, an application will typically use ExtractScheme to determine the -type of a given input URL, and then call one of the initialization functions: -"ParseStandardURL", "ParsePathURL", or "ParseFileURL". This will result in -a "Parsed" structure which identifies the substrings of each identified -component. - -2. Canonicalization -------------------- -At the next highest level is canonicalization. The files encompasing this are -url_canon.* and the main include file is src/url_canon.h. This code will -validate an already-parsed URL, and will convert it to a canonical form. For -example, this will convert host names to lowercase, convert IP addresses -into dotted-decimal notation, handle encoding issues, etc. - -This layer will always do its best to produce a reasonable output string, but -it may return that the string is invalid. For example, if there are invalid -characters in the host name, it will escape them or replace them with the -Unicode "invalid character" character, but will fail. This way, the program can -display error messages to the user with the output, log it, etc. and the -string will have some meaning. - -Canonicalized output is written to a CanonOutput object which is a simple -wrapper around an expanding buffer. An implementation called RawCanonOutput is -proivided that writes to a raw buffer with a fixed amount statically allocated -(for performance). Applications using STL can use StdStringCanonOutput defined -in url_canon_stdstring.h which writes into a std::string. - -A normal application would call one of the four high-level functions -"CanonicalizeStandardURL", "CanonicalizeFileURL", "CanonicalizeFileSystemURL", -and CanonicalizePathURL" depending on the type of URL in question. Lower-level -functions are also provided which will canonicalize individual parts of a URL -(for example, "CanonicalizeHost"). - -Part of this layer is the integration with the host system for IDN and encoding -conversion. An implementation that provides integration with the ICU -(http://www-306.ibm.com/software/globalization/icu/index.jsp) is provided in -src/url_canon_icu.cc. The embedder may wish to replace this file with -implementations of the functions for their own IDN library if they do not use -ICU. - -3. Application integration --------------------------- -The canonicalization and parsing layers do not know anything about the URI -schemes supported by your application. The parsing and canonicalization -functions are very low-level, and you must call the correct function to do the -work (for example, "CanonicalizeFileURL"). - -The application integration in url_util.* provides wrappers around the -low-level parsing and canonicalization to call the correct versions for -different identified schemes. Embedders will want to modify this file if -necessary to suit the needs of their application. - -4. URL object -------------- -The highest level is the "URL" object that a C++ application would use to -to encapsulate a URL. Embedders will typically want to provide their own URL -object that meets the requirements of their system. A reasonably complete -example implemnetation is provided in src/gurl.*. You may wish to use this -object, extend or modify it, or write your own. - -Whitespace ----------- -Sometimes, you may want to remove linefeeds and tabs from the content of a URL. -Some web pages, for example, expect that a URL spanning two lines should be -treated as one with the newline removed. Depending on the source of the URLs -you are canonicalizing, these newlines may or may not be trimmed off. - -If you want this behavior, call RemoveURLWhitespace before parsing. This will -remove CR, LF and TAB from the input. Note that it preserves spaces. On typical -URLs, this function produces a 10-15% speed reduction, so it is optional and -not done automatically. The example GURL object and the url_util wrapper does -this for you. - -Tests -===== - -There are a number of *_unittest.cc and *_perftest.cc files. These files are -not currently compilable as they rely on a not-included unit testing framework -Tests are declared like this: - TEST(TestCaseName, TestName) { - ASSERT_TRUE(a); - EXPECT_EQ(a, b); - } -If you would like to compile them, it should be straightforward to define -the TEST macro (which would declare a function by combining the two arguments) -and the other macros whose behavior should be self-explanatory (EXPECT is like -an ASSERT, but does not stop the test, if you are doing this, you probably -don't care about this difference). Then you would define a .cc file that -calls all of these functions. diff --git a/ePub3/ThirdParty/google-url/.svn/text-base/codereview.settings.svn-base b/ePub3/ThirdParty/google-url/.svn/text-base/codereview.settings.svn-base deleted file mode 100644 index 94d5b0ebe..000000000 --- a/ePub3/ThirdParty/google-url/.svn/text-base/codereview.settings.svn-base +++ /dev/null @@ -1,4 +0,0 @@ -# This file is used by gcl to get repository specific information. -CODE_REVIEW_SERVER: codereview.chromium.org -CC_LIST: chromium-reviews@chromium.org -VIEW_VC: http://code.google.com/p/google-url/source/detail?r= diff --git a/ePub3/ThirdParty/google-url/base/.svn/all-wcprops b/ePub3/ThirdParty/google-url/base/.svn/all-wcprops deleted file mode 100644 index 8eb11558a..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/all-wcprops +++ /dev/null @@ -1,47 +0,0 @@ -K 25 -svn:wc:ra_dav:version-url -V 28 -/svn/!svn/ver/177/trunk/base -END -string16.cc -K 25 -svn:wc:ra_dav:version-url -V 40 -/svn/!svn/ver/177/trunk/base/string16.cc -END -string16.h -K 25 -svn:wc:ra_dav:version-url -V 38 -/svn/!svn/ver/99/trunk/base/string16.h -END -scoped_ptr.h -K 25 -svn:wc:ra_dav:version-url -V 40 -/svn/!svn/ver/97/trunk/base/scoped_ptr.h -END -logging.cc -K 25 -svn:wc:ra_dav:version-url -V 38 -/svn/!svn/ver/97/trunk/base/logging.cc -END -README.txt -K 25 -svn:wc:ra_dav:version-url -V 37 -/svn/!svn/ver/2/trunk/base/README.txt -END -basictypes.h -K 25 -svn:wc:ra_dav:version-url -V 41 -/svn/!svn/ver/119/trunk/base/basictypes.h -END -logging.h -K 25 -svn:wc:ra_dav:version-url -V 37 -/svn/!svn/ver/97/trunk/base/logging.h -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/entries b/ePub3/ThirdParty/google-url/base/.svn/entries deleted file mode 100644 index e5a223b47..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/entries +++ /dev/null @@ -1,266 +0,0 @@ -10 - -dir -181 -http://google-url.googlecode.com/svn/trunk/base -http://google-url.googlecode.com/svn - - - -2012-08-20T17:29:48.279056Z -177 -brettw - - - - - - - - - - - - - - -8873c55e-713a-0410-88f8-23d9c3d90b1b - -string16.cc -file - - - - -2013-01-16T16:12:52.000000Z -dc63772b8a3d34d4877acf9e399cd306 -2012-08-20T17:29:48.279056Z -177 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -3011 - -string16.h -file - - - - -2013-01-16T16:12:52.000000Z -737b6cd7b947d487881028c9b9bf44e6 -2009-03-25T16:28:58.981467Z -99 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -7458 - -scoped_ptr.h -file - - - - -2013-01-16T16:12:52.000000Z -d0d1244aee87eedc34a7141de825fbd9 -2009-03-05T17:52:33.119165Z -97 -maruel@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -6945 - -logging.cc -file - - - - -2013-01-16T16:12:52.000000Z -7055b57b38a3f1c131a0b5a65248832a -2009-03-05T17:52:33.119165Z -97 -maruel@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -12965 - -README.txt -file - - - - -2013-01-16T16:12:52.000000Z -02dd67f358bd9c33802576f7f708117d -2007-09-28T00:52:30.645870Z -2 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -121 - -basictypes.h -file - - - - -2013-01-16T16:12:52.000000Z -1609fd06729cb0e9233934757c528765 -2009-09-28T20:28:00.980603Z -119 -brettw@gmail.com -has-props - - - - - - - - - - - - - - - - - - - - -3606 - -logging.h -file - - - - -2013-01-16T16:12:52.000000Z -f5046e46c8bb5605c4ec10f5f41a57cc -2009-03-05T17:52:33.119165Z -97 -maruel@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -19272 - diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/README.txt.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/README.txt.svn-base deleted file mode 100644 index bdbd30518..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/README.txt.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 6 -native -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/basictypes.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/basictypes.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/basictypes.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.cc.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/logging.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/scoped_ptr.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/scoped_ptr.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/scoped_ptr.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.cc.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/prop-base/string16.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/README.txt.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/README.txt.svn-base deleted file mode 100644 index 311faa0d6..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/README.txt.svn-base +++ /dev/null @@ -1,2 +0,0 @@ -These files contain some shared code. You can define your own assertion macros -to eliminate the dependency on logging.h. diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/basictypes.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/basictypes.h.svn-base deleted file mode 100644 index b0c404d1d..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/basictypes.h.svn-base +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2001 - 2003 Google Inc. All Rights Reserved - -#ifndef BASE_BASICTYPES_H__ -#define BASE_BASICTYPES_H__ - -typedef unsigned char uint8; -typedef unsigned short uint16; -typedef unsigned int uint32; - -const uint8 kuint8max = (( uint8) 0xFF); -const uint32 kuint32max = ((uint32) 0xFFFFFFFF); - -// The arraysize(arr) macro returns the # of elements in an array arr. -// The expression is a compile-time constant, and therefore can be -// used in defining new arrays, for example. If you use arraysize on -// a pointer by mistake, you will get a compile-time error. -// -// One caveat is that arraysize() doesn't accept any array of an -// anonymous type or a type defined inside a function. In these rare -// cases, you have to use the unsafe ARRAYSIZE() macro below. This is -// due to a limitation in C++'s template system. The limitation might -// eventually be removed, but it hasn't happened yet. - -// This template function declaration is used in defining arraysize. -// Note that the function doesn't need an implementation, as we only -// use its type. -template -char (&ArraySizeHelper(T (&array)[N]))[N]; - -// That gcc wants both of these prototypes seems mysterious. VC, for -// its part, can't decide which to use (another mystery). Matching of -// template overloads: the final frontier. -#ifndef _MSC_VER -template -char (&ArraySizeHelper(const T (&array)[N]))[N]; -#endif - -#define arraysize(array) (sizeof(ArraySizeHelper(array))) - -// ARRAYSIZE performs essentially the same calculation as arraysize, -// but can be used on anonymous types or types defined inside -// functions. It's less safe than arraysize as it accepts some -// (although not all) pointers. Therefore, you should use arraysize -// whenever possible. -// -// The expression ARRAYSIZE(a) is a compile-time constant of type -// size_t. -// -// ARRAYSIZE catches a few type errors. If you see a compiler error -// -// "warning: division by zero in ..." -// -// when using ARRAYSIZE, you are (wrongfully) giving it a pointer. -// You should only use ARRAYSIZE on statically allocated arrays. -// -// The following comments are on the implementation details, and can -// be ignored by the users. -// -// ARRAYSIZE(arr) works by inspecting sizeof(arr) (the # of bytes in -// the array) and sizeof(*(arr)) (the # of bytes in one array -// element). If the former is divisible by the latter, perhaps arr is -// indeed an array, in which case the division result is the # of -// elements in the array. Otherwise, arr cannot possibly be an array, -// and we generate a compiler error to prevent the code from -// compiling. -// -// Since the size of bool is implementation-defined, we need to cast -// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final -// result has type size_t. -// -// This macro is not perfect as it wrongfully accepts certain -// pointers, namely where the pointer size is divisible by the pointee -// size. Since all our code has to go through a 32-bit compiler, -// where a pointer is 4 bytes, this means all pointers to a type whose -// size is 3 or greater than 4 will be (righteously) rejected. -// -// Starting with Visual C++ 2005, WinNT.h includes ARRAYSIZE. -#define ARRAYSIZE_UNSAFE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) - -// A macro to disallow the evil copy constructor and operator= functions -// This should be used in the private: declarations for a class -#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) - -#endif // BASE_BASICTYPES_H__ diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/logging.cc.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/logging.cc.svn-base deleted file mode 100644 index ab03150e0..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/logging.cc.svn-base +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include -#include -#include -#include -#include "base/logging.h" - -namespace logging { - -const char* const log_severity_names[LOG_NUM_SEVERITIES] = { - "INFO", "WARNING", "ERROR", "FATAL" }; - -int min_log_level = 0; -LogLockingState lock_log_file = LOCK_LOG_FILE; -LoggingDestination logging_destination = LOG_ONLY_TO_FILE; - -const int kMaxFilteredLogLevel = LOG_WARNING; -char* log_filter_prefix = NULL; - -// which log file to use? This is initialized by InitLogging or -// will be lazily initialized to the default value when it is -// first needed. -TCHAR log_file_name[MAX_PATH] = { 0 }; - -// this file is lazily opened and the handle may be NULL -HANDLE log_file = NULL; - -// what should be prepended to each message? -bool log_process_id = false; -bool log_thread_id = false; -bool log_timestamp = true; -bool log_tickcount = false; - -// An assert handler override specified by the client to be called instead of -// the debug message dialog. -LogAssertHandlerFunction log_assert_handler = NULL; - -// The critical section is used if log file locking is false. It helps us -// avoid problems with multiple threads writing to the log file at the same -// time. -bool initialized_critical_section = false; -CRITICAL_SECTION log_critical_section; - -// When we don't use a critical section, we are using a global mutex. We -// need to do this because LockFileEx is not thread safe -HANDLE log_mutex = NULL; - -// Called by logging functions to ensure that debug_file is initialized -// and can be used for writing. Returns false if the file could not be -// initialized. debug_file will be NULL in this case. -bool InitializeLogFileHandle() { - if (log_file) - return true; - - if (!log_file_name[0]) { - // nobody has called InitLogging to specify a debug log file, so here we - // initialize the log file name to the default - GetModuleFileName(NULL, log_file_name, MAX_PATH); - TCHAR* last_backslash = _tcsrchr(log_file_name, '\\'); - if (last_backslash) - last_backslash[1] = 0; // name now ends with the backslash - _tcscat_s(log_file_name, _T("debug.log")); - } - - log_file = CreateFile(log_file_name, GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, - OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); - if (log_file == INVALID_HANDLE_VALUE || log_file == NULL) { - // try the current directory - log_file = CreateFile(_T(".\\debug.log"), GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, - OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); - if (log_file == INVALID_HANDLE_VALUE || log_file == NULL) { - log_file = NULL; - return false; - } - } - SetFilePointer(log_file, 0, 0, FILE_END); - return true; -} - -void InitLogMutex() { - if (!log_mutex) { - // \ is not a legal character in mutex names so we replace \ with / - std::wstring safe_name(log_file_name); - std::replace(safe_name.begin(), safe_name.end(), '\\', '/'); - std::wstring t(L"Global\\"); - t.append(safe_name); - log_mutex = ::CreateMutex(NULL, FALSE, t.c_str()); - } -} - -void InitLogging(const TCHAR* new_log_file, LoggingDestination logging_dest, - LogLockingState lock_log, OldFileDeletionState delete_old) { - if (log_file) { - // calling InitLogging twice or after some log call has already opened the - // default log file will re-initialize to the new options - CloseHandle(log_file); - log_file = NULL; - } - - lock_log_file = lock_log; - logging_destination = logging_dest; - - // ignore file options if logging is only to system - if (logging_destination == LOG_ONLY_TO_SYSTEM_DEBUG_LOG) - return; - - _tcscpy_s(log_file_name, MAX_PATH, new_log_file); - if (delete_old == DELETE_OLD_LOG_FILE) - DeleteFile(log_file_name); - - if (lock_log_file == LOCK_LOG_FILE) { - InitLogMutex(); - } else if (!initialized_critical_section) { - // initialize the critical section - InitializeCriticalSection(&log_critical_section); - initialized_critical_section = true; - } - - InitializeLogFileHandle(); -} - -void SetMinLogLevel(int level) { - min_log_level = level; -} - -void SetLogFilterPrefix(char* filter) { - if (log_filter_prefix) { - delete[] log_filter_prefix; - log_filter_prefix = NULL; - } - - if (filter) { - size_t size = strlen(filter)+1; - log_filter_prefix = new char[size]; - strcpy_s(log_filter_prefix, size, filter); - } -} - -void SetLogItems(bool enable_process_id, bool enable_thread_id, - bool enable_timestamp, bool enable_tickcount) { - log_process_id = enable_process_id; - log_thread_id = enable_thread_id; - log_timestamp = enable_timestamp; - log_tickcount = enable_tickcount; -} - -void SetLogAssertHandler(LogAssertHandlerFunction handler) { - log_assert_handler = handler; -} - -// Displays a message box to the user with the error message in it. For -// Windows programs, it's possible that the message loop is messed up on -// a fatal error, and creating a MessageBox will cause that message loop -// to be run. Instead, we try to spawn another process that displays its -// command line. We look for "Debug Message.exe" in the same directory as -// the application. If it exists, we use it, otherwise, we use a regular -// message box. -void DisplayDebugMessage(const std::string& str) { - if (str.empty()) - return; - - // look for the debug dialog program next to our application - wchar_t prog_name[MAX_PATH]; - GetModuleFileNameW(NULL, prog_name, MAX_PATH); - wchar_t* backslash = wcsrchr(prog_name, '\\'); - if (backslash) - backslash[1] = 0; - wcscat_s(prog_name, MAX_PATH, L"debug_message.exe"); - - // stupid CreateProcess requires a non-const command line and may modify it. - // We also want to use the wide string - int charcount = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0); - if (!charcount) - return; - scoped_array cmdline(new wchar_t[charcount]); - if (!MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, cmdline.get(), charcount)) - return; - - STARTUPINFO startup_info; - memset(&startup_info, 0, sizeof(startup_info)); - startup_info.cb = sizeof(startup_info); - - PROCESS_INFORMATION process_info; - if (CreateProcessW(prog_name, cmdline.get(), NULL, NULL, false, 0, NULL, - NULL, &startup_info, &process_info)) { - WaitForSingleObject(process_info.hProcess, INFINITE); - CloseHandle(process_info.hThread); - CloseHandle(process_info.hProcess); - } else { - // debug process broken, let's just do a message box - MessageBoxW(NULL, cmdline.get(), L"Fatal error", MB_OK | MB_ICONHAND); - } -} - -LogMessage::LogMessage(const char* file, int line, LogSeverity severity, - int ctr) - : severity_(severity) { - Init(file, line); -} - -LogMessage::LogMessage(const char* file, int line, const CheckOpString& result) - : severity_(LOG_FATAL) { - Init(file, line); - stream_ << "Check failed: " << (*result.str_); -} - -LogMessage::LogMessage(const char* file, int line) - : severity_(LOG_INFO) { - Init(file, line); -} - -LogMessage::LogMessage(const char* file, int line, LogSeverity severity) - : severity_(severity) { - Init(file, line); -} - -// writes the common header info to the stream -void LogMessage::Init(const char* file, int line) { - // log only the filename - const char* last_slash = strrchr(file, '\\'); - if (last_slash) - file = last_slash + 1; - - stream_ << '['; - if (log_process_id) - stream_ << GetCurrentProcessId() << ':'; - if (log_thread_id) - stream_ << GetCurrentThreadId() << ':'; - if (log_timestamp) { - time_t t = time(NULL); - struct tm tm_time; - localtime_s(&tm_time, &t); - stream_ << std::setfill('0') - << std::setw(2) << 1 + tm_time.tm_mon - << std::setw(2) << tm_time.tm_mday - << '/' - << std::setw(2) << tm_time.tm_hour - << std::setw(2) << tm_time.tm_min - << std::setw(2) << tm_time.tm_sec - << ':'; - } - if (log_tickcount) - stream_ << GetTickCount() << ':'; - stream_ << log_severity_names[severity_] << ":" << file << "(" << line << ")] "; - - message_start_ = stream_.pcount(); -} - -LogMessage::~LogMessage() { - if (severity_ < min_log_level) - return; - - std::string str_newline(stream_.str(), stream_.pcount()); - str_newline.append("\r\n"); - - if (log_filter_prefix && severity_ <= kMaxFilteredLogLevel && - str_newline.compare(message_start_, strlen(log_filter_prefix), - log_filter_prefix) != 0) { - goto cleanup; - } - - if (logging_destination != LOG_ONLY_TO_FILE) - OutputDebugStringA(str_newline.c_str()); - - // write to log file - if (logging_destination != LOG_ONLY_TO_SYSTEM_DEBUG_LOG && - InitializeLogFileHandle()) { - // we can have multiple threads and/or processes, so try to prevent them from - // clobbering each other's writes - if (lock_log_file == LOCK_LOG_FILE) { - // Ensure that the mutex is initialized in case the client app did not - // call InitLogging. This is not thread safe. See below - InitLogMutex(); - - DWORD r = ::WaitForSingleObject(log_mutex, INFINITE); - DCHECK(r != WAIT_ABANDONED); - } else { - // use the critical section - if (!initialized_critical_section) { - // The client app did not call InitLogging, and so the critical section - // has not been created. We do this on demand, but if two threads try to - // do this at the same time, there will be a race condition to create - // the critical section. This is why InitLogging should be called from - // the main thread at the beginning of execution. - InitializeCriticalSection(&log_critical_section); - initialized_critical_section = true; - } - EnterCriticalSection(&log_critical_section); - } - - SetFilePointer(log_file, 0, 0, SEEK_END); - DWORD num_written; - WriteFile(log_file, (void*)str_newline.c_str(), (DWORD)str_newline.length(), &num_written, NULL); - - if (lock_log_file == LOCK_LOG_FILE) { - ReleaseMutex(log_mutex); - } else { - LeaveCriticalSection(&log_critical_section); - } - } - - if (severity_ == LOG_FATAL) { - // display a message or break into the debugger on a fatal error - if (::IsDebuggerPresent()) { - DebugBreak(); - } else { - if (log_assert_handler) { - log_assert_handler(std::string(stream_.str(), stream_.pcount())); - } else { - // don't use the string with the newline, get a fresh version to send to - // the debug message process - DisplayDebugMessage(std::string(stream_.str(), stream_.pcount())); - TerminateProcess(GetCurrentProcess(), 1); - } - } - } - -cleanup: - // Calling stream_.str() freezes the stream buffer. A frozen buffer will - // not be freed during strstreambuf destruction. - stream_.freeze(false); -} - -void CloseLogFile() { - if (!log_file) - return; - - CloseHandle(log_file); - log_file = NULL; -} - -} // namespace logging - -std::ostream& operator<<(std::ostream& out, const wchar_t* wstr) { - if (!wstr || !wstr[0]) - return out; - - // compute the length of the buffer we'll need - int charcount = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, - NULL, 0, NULL, NULL); - if (charcount == 0) - return out; - - // convert - scoped_array buf(new char[charcount]); - WideCharToMultiByte(CP_UTF8, 0, wstr, -1, buf.get(), charcount, NULL, NULL); - return out << buf.get(); -} diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/logging.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/logging.h.svn-base deleted file mode 100644 index 5353b59bd..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/logging.h.svn-base +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2006 Google Inc. All Rights Reserved. -// Author: brettw (Brett Wilson) - -#ifndef BASE_LOGGING_H__ -#define BASE_LOGGING_H__ - -#include -#include -#include -#include - -#include "base/basictypes.h" -#include "base/scoped_ptr.h" - -// Optional message capabilities -// ----------------------------- -// Assertion failed messages and fatal errors are displayed in a dialog box -// before the application exits. However, running this UI creates a message -// loop, which causes application messages to be processed and potentially -// dispatched to existing application windows. Since the application is in a -// bad state when this assertion dialog is displayed, these messages may not -// get processed and hang the dialog, or the application might go crazy. -// -// Therefore, it can be beneficial to display the error dialog in a separate -// process from the main application. When the logging system needs to display -// a fatal error dialog box, it will look for a program called -// "DebugMessage.exe" in the same directory as the application executable. It -// will run this application with the message as the command line, and will -// not include the name of the application as is traditional for easier -// parsing. -// -// The code for DebugMessage.exe is only one line. In WinMain, do: -// MessageBox(NULL, GetCommandLineW(), L"Fatal Error", 0); -// -// If DebugMessage.exe is not found, the logging code will use a normal -// MessageBox, potentially causing the problems discussed above. - - -// Instructions -// ------------ -// -// Make a bunch of macros for logging. The way to log things is to stream -// things to LOG(). E.g., -// -// LOG(INFO) << "Found " << num_cookies << " cookies"; -// -// You can also do conditional logging: -// -// LOG_IF(INFO, num_cookies > 10) << "Got lots of cookies"; -// -// The above will cause log messages to be output on the 1st, 11th, 21st, ... -// times it is executed. Note that the special COUNTER value is used to -// identify which repetition is happening. -// -// There are also "debug mode" logging macros like the ones above: -// -// DLOG(INFO) << "Found cookies"; -// -// DLOG_IF(INFO, num_cookies > 10) << "Got lots of cookies"; -// -// All "debug mode" logging is compiled away to nothing for non-debug mode -// compiles. LOG_IF and development flags also work well together -// because the code can be compiled away sometimes. -// -// We also have -// -// LOG_ASSERT(assertion); -// DLOG_ASSERT(assertion); -// -// which is syntactic sugar for {,D}LOG_IF(FATAL, assert fails) << assertion; -// -// We also override the standard 'assert' to use 'DLOG_ASSERT'. -// -// The supported severity levels for macros that allow you to specify one -// are (in increasing order of severity) INFO, WARNING, ERROR, and FATAL. -// -// There is also the special severity of DFATAL, which logs FATAL in -// debug mode, ERROR in normal mode. -// -// Very important: logging a message at the FATAL severity level causes -// the program to terminate (after the message is logged). - -namespace logging { - -// Where to record logging output? A flat file and/or system debug log via -// OutputDebugString. Defaults to LOG_ONLY_TO_FILE. -enum LoggingDestination { LOG_ONLY_TO_FILE, - LOG_ONLY_TO_SYSTEM_DEBUG_LOG, - LOG_TO_BOTH_FILE_AND_SYSTEM_DEBUG_LOG }; - -// Indicates that the log file should be locked when being written to. -// Often, there is no locking, which is fine for a single threaded program. -// If logging is being done from multiple threads or there can be more than -// one process doing the logging, the file should be locked during writes to -// make each log outut atomic. Other writers will block. -// -// All processes writing to the log file must have their locking set for it to -// work properly. Defaults to DONT_LOCK_LOG_FILE. -enum LogLockingState { LOCK_LOG_FILE, DONT_LOCK_LOG_FILE }; - -// On startup, should we delete or append to an existing log file (if any)? -// Defaults to APPEND_TO_OLD_LOG_FILE. -enum OldFileDeletionState { DELETE_OLD_LOG_FILE, APPEND_TO_OLD_LOG_FILE }; - -// Sets the log file name and other global logging state. Calling this function -// is recommended, and is normally done at the beginning of application init. -// If you don't call it, all the flags will be initialized to their default -// values, and there is a race condition that may leak a critical section -// object if two threads try to do the first log at the same time. -// See the definition of the enums above for descriptions and default values. -// -// The default log file is initialized to "debug.log" in the application -// directory. You probably don't want this, especially since the program -// directory may not be writable on an enduser's system. -void InitLogging(const TCHAR* log_file, LoggingDestination logging_dest, - LogLockingState lock_log, OldFileDeletionState delete_old); - -// Sets the log level. Anything at or above this level will be written to the -// log file/displayed to the user (if applicable). Anything below this level -// will be silently ignored. The log level defaults to 0 (everything is logged) -// if this function is not called. -void SetMinLogLevel(int level); - -// Sets the log filter prefix. Any log message below LOG_ERROR severity that -// doesn't start with this prefix with be silently ignored. The filter defaults -// to NULL (everything is logged) if this function is not called. Messages -// with severity of LOG_ERROR or higher will not be filtered. -void SetLogFilterPrefix(char* filter); - -// Sets the common items you want to be prepended to each log message. -// process and thread IDs default to off, the timestamp defaults to on. -// If this function is not called, logging defaults to writing the timestamp -// only. -void SetLogItems(bool enable_process_id, bool enable_thread_id, - bool enable_timestamp, bool enable_tickcount); - -// Sets the Log Assert Handler that will be used to notify of check failures. -// The default handler shows a dialog box, however clients can use this -// function to override with their own handling (e.g. a silent one for Unit -// Tests) -typedef void (*LogAssertHandlerFunction)(const std::string& str); -void SetLogAssertHandler(LogAssertHandlerFunction handler); - -typedef int LogSeverity; -const LogSeverity LOG_INFO = 0; -const LogSeverity LOG_WARNING = 1; -const LogSeverity LOG_ERROR = 2; -const LogSeverity LOG_FATAL = 3; -const LogSeverity LOG_NUM_SEVERITIES = 4; - -// LOG_DFATAL_LEVEL is LOG_FATAL in debug mode, ERROR in normal mode -#ifdef NDEBUG -const LogSeverity LOG_DFATAL_LEVEL = LOG_ERROR; -#else -const LogSeverity LOG_DFATAL_LEVEL = LOG_FATAL; -#endif - -// A few definitions of macros that don't generate much code. These are used -// by LOG() and LOG_IF, etc. Since these are used all over our code, it's -// better to have compact code for these operations. -#define COMPACT_GOOGLE_LOG_INFO \ - logging::LogMessage(__FILE__, __LINE__) -#define COMPACT_GOOGLE_LOG_WARNING \ - logging::LogMessage(__FILE__, __LINE__, logging::LOG_WARNING) -#define COMPACT_GOOGLE_LOG_ERROR \ - logging::LogMessage(__FILE__, __LINE__, logging::LOG_ERROR) -#define COMPACT_GOOGLE_LOG_FATAL \ - logging::LogMessage(__FILE__, __LINE__, logging::LOG_FATAL) -#define COMPACT_GOOGLE_LOG_DFATAL \ - logging::LogMessage(__FILE__, __LINE__, logging::LOG_DFATAL_LEVEL) - -// wingdi.h defines ERROR to be 0. When we call LOG(ERROR), it gets -// substituted with 0, and it expands to COMPACT_GOOGLE_LOG_0. To allow us -// to keep using this syntax, we define this macro to do the same thing -// as COMPACT_GOOGLE_LOG_ERROR, and also define ERROR the same way that -// the Windows SDK does for consistency. -#define ERROR 0 -#define COMPACT_GOOGLE_LOG_0 \ - logging::LogMessage(__FILE__, __LINE__, logging::LOG_ERROR) - -// We use the preprocessor's merging operator, "##", so that, e.g., -// LOG(INFO) becomes the token COMPACT_GOOGLE_LOG_INFO. There's some funny -// subtle difference between ostream member streaming functions (e.g., -// ostream::operator<<(int) and ostream non-member streaming functions -// (e.g., ::operator<<(ostream&, string&): it turns out that it's -// impossible to stream something like a string directly to an unnamed -// ostream. We employ a neat hack by calling the stream() member -// function of LogMessage which seems to avoid the problem. - -#define LOG(severity) COMPACT_GOOGLE_LOG_ ## severity.stream() -#define SYSLOG(severity) LOG(severity) - -#define LOG_IF(severity, condition) \ - !(condition) ? (void) 0 : logging::LogMessageVoidify() & LOG(severity) -#define SYSLOG_IF(severity, condition) LOG_IF(severity, condition) - -#define LOG_ASSERT(condition) \ - LOG_IF(FATAL, !(condition)) << "Assert failed: " #condition ". " -#define SYSLOG_ASSERT(condition) \ - SYSLOG_IF(FATAL, !(condition)) << "Assert failed: " #condition ". " - -// A container for a string pointer which can be evaluated to a bool - -// true iff the pointer is NULL. -struct CheckOpString { - CheckOpString(std::string* str) : str_(str) { } - // No destructor: if str_ is non-NULL, we're about to LOG(FATAL), - // so there's no point in cleaning up str_. - operator bool() const { return str_ != NULL; } - std::string* str_; -}; - -// Build the error message string. This is separate from the "Impl" -// function template because it is not performance critical and so can -// be out of line, while the "Impl" code should be inline. -template -std::string* MakeCheckOpString(const t1& v1, const t2& v2, const char* names) { - std::ostrstream ss; - ss << names << " (" << v1 << " vs. " << v2 << ")"; - return new std::string(ss.str(), ss.pcount()); -} - -extern std::string* MakeCheckOpStringIntInt(int v1, int v2, const char* names); - -template -std::string* MakeCheckOpString(const int& v1, const int& v2, const char* names) { - return MakeCheckOpStringIntInt(v1, v2, names); -} - -// Plus some debug-logging macros that get compiled to nothing for production -// -// DEBUG_MODE is for uses like -// if (DEBUG_MODE) foo.CheckThatFoo(); -// instead of -// #ifndef NDEBUG -// foo.CheckThatFoo(); -// #endif - -#ifndef NDEBUG - -#define DLOG(severity) LOG(severity) -#define DLOG_IF(severity, condition) LOG_IF(severity, condition) -#define DLOG_ASSERT(condition) LOG_ASSERT(condition) - -// debug-only checking. not executed in NDEBUG mode. -enum { DEBUG_MODE = 1 }; -#define DCHECK(condition) \ - LOG_IF(FATAL, !(condition)) << "Check failed: " #condition ". " - -// Helper functions for DCHECK_OP macro. -// The (int, int) specialization works around the issue that the compiler -// will not instantiate the template version of the function on values of -// unnamed enum type - see comment below. -#define DEFINE_DCHECK_OP_IMPL(name, op) \ - template \ - inline std::string* Check##name##Impl(const t1& v1, const t2& v2, \ - const char* names) { \ - if (v1 op v2) return NULL; \ - else return MakeCheckOpString(v1, v2, names); \ - } \ - inline std::string* Check##name##Impl(int v1, int v2, const char* names) { \ - if (v1 op v2) return NULL; \ - else return MakeCheckOpString(v1, v2, names); \ - } -DEFINE_DCHECK_OP_IMPL(EQ, ==) -DEFINE_DCHECK_OP_IMPL(NE, !=) -DEFINE_DCHECK_OP_IMPL(LE, <=) -DEFINE_DCHECK_OP_IMPL(LT, < ) -DEFINE_DCHECK_OP_IMPL(GE, >=) -DEFINE_DCHECK_OP_IMPL(GT, > ) -#undef DEFINE_DCHECK_OP_IMPL - -// Helper macro for binary operators. -// Don't use this macro directly in your code, use CHECK_EQ et al below. -#define DCHECK_OP(name, op, val1, val2) \ - while (logging::CheckOpString _result = \ - logging::Check##name##Impl((val1), (val2), #val1 " " #op " " #val2)) \ - logging::LogMessage(__FILE__, __LINE__, _result).stream() - -// Equality/Inequality checks - compare two values, and log a LOG_FATAL message -// including the two values when the result is not as expected. The values -// must have operator<<(ostream, ...) defined. -// -// You may append to the error message like so: -// CHECK_NE(1, 2) << ": The world must be ending!"; -// -// We are very careful to ensure that each argument is evaluated exactly -// once, and that anything which is legal to pass as a function argument is -// legal here. In particular, the arguments may be temporary expressions -// which will end up being destroyed at the end of the apparent statement, -// for example: -// CHECK_EQ(string("abc")[1], 'b'); -// -// WARNING: These don't compile correctly if one of the arguments is a pointer -// and the other is NULL. To work around this, simply static_cast NULL to the -// type of the desired pointer. - -#define DCHECK_EQ(val1, val2) DCHECK_OP(EQ, ==, val1, val2) -#define DCHECK_NE(val1, val2) DCHECK_OP(NE, !=, val1, val2) -#define DCHECK_LE(val1, val2) DCHECK_OP(LE, <=, val1, val2) -#define DCHECK_LT(val1, val2) DCHECK_OP(LT, < , val1, val2) -#define DCHECK_GE(val1, val2) DCHECK_OP(GE, >=, val1, val2) -#define DCHECK_GT(val1, val2) DCHECK_OP(GT, > , val1, val2) - -// Helper functions for string comparisons. -// To avoid bloat, the definitions are in logging.cc. -#define DECLARE_DCHECK_STROP_IMPL(func, expected) \ - std::string* Check##func##expected##Impl(const char* s1, \ - const char* s2, \ - const char* names); -DECLARE_DCHECK_STROP_IMPL(strcmp, true) -DECLARE_DCHECK_STROP_IMPL(strcmp, false) -DECLARE_DCHECK_STROP_IMPL(_stricmp, true) -DECLARE_DCHECK_STROP_IMPL(_stricmp, false) -#undef DECLARE_DCHECK_STROP_IMPL - -// Helper macro for string comparisons. -// Don't use this macro directly in your code, use CHECK_STREQ et al below. -#define DCHECK_STROP(func, op, expected, s1, s2) \ - while (CheckOpString _result = \ - logging::Check##func##expected##Impl((s1), (s2), \ - #s1 " " #op " " #s2)) \ - LOG(FATAL) << *_result.str_ - -// String (char*) equality/inequality checks. -// CASE versions are case-insensitive. -// -// Note that "s1" and "s2" may be temporary strings which are destroyed -// by the compiler at the end of the current "full expression" -// (e.g. DCHECK_STREQ(Foo().c_str(), Bar().c_str())). - -#define DCHECK_STREQ(s1, s2) DCHECK_STROP(strcmp, ==, true, s1, s2) -#define DCHECK_STRNE(s1, s2) DCHECK_STROP(strcmp, !=, false, s1, s2) -#define DCHECK_STRCASEEQ(s1, s2) DCHECK_STROP(_stricmp, ==, true, s1, s2) -#define DCHECK_STRCASENE(s1, s2) DCHECK_STROP(_stricmp, !=, false, s1, s2) - -#define DCHECK_INDEX(I,A) DCHECK(I < (sizeof(A)/sizeof(A[0]))) -#define DCHECK_BOUND(B,A) DCHECK(B <= (sizeof(A)/sizeof(A[0]))) - -#else // NDEBUG - -#define DLOG(severity) \ - true ? (void) 0 : logging::LogMessageVoidify() & LOG(severity) - -#define DLOG_IF(severity, condition) \ - true ? (void) 0 : logging::LogMessageVoidify() & LOG(severity) - -#define DLOG_ASSERT(condition) \ - true ? (void) 0 : LOG_ASSERT(condition) - -enum { DEBUG_MODE = 0 }; - -// This macro can be followed by a sequence of stream parameters in -// non-debug mode. The DCHECK and friends macros use this so that -// the expanded expression DCHECK(foo) << "asdf" is still syntactically -// valid, even though the expression will get optimized away. -#define NDEBUG_EAT_STREAM_PARAMETERS \ - logging::LogMessage(__FILE__, __LINE__).stream() - -#define DCHECK(condition) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_EQ(val1, val2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_NE(val1, val2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_LE(val1, val2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_LT(val1, val2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_GE(val1, val2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_GT(val1, val2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_STREQ(str1, str2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_STRCASEEQ(str1, str2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_STRNE(str1, str2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#define DCHECK_STRCASENE(str1, str2) \ - while (false) NDEBUG_EAT_STREAM_PARAMETERS - -#endif // NDEBUG - -#define NOTREACHED() DCHECK(false) - -// Redefine the standard assert to use our nice log files -#undef assert -#define assert(x) DLOG_ASSERT(x) - -// This class more or less represents a particular log message. You -// create an instance of LogMessage and then stream stuff to it. -// When you finish streaming to it, ~LogMessage is called and the -// full message gets streamed to the appropriate destination. -// -// You shouldn't actually use LogMessage's constructor to log things, -// though. You should use the LOG() macro (and variants thereof) -// above. -class LogMessage { - public: - LogMessage(const char* file, int line, LogSeverity severity, int ctr); - - // Two special constructors that generate reduced amounts of code at - // LOG call sites for common cases. - // - // Used for LOG(INFO): Implied are: - // severity = LOG_INFO, ctr = 0 - // - // Using this constructor instead of the more complex constructor above - // saves a couple of bytes per call site. - LogMessage(const char* file, int line); - - // Used for LOG(severity) where severity != INFO. Implied - // are: ctr = 0 - // - // Using this constructor instead of the more complex constructor above - // saves a couple of bytes per call site. - LogMessage(const char* file, int line, LogSeverity severity); - - // A special constructor used for check failures. - // Implied severity = LOG_FATAL - LogMessage(const char* file, int line, const CheckOpString& result); - - ~LogMessage(); - - std::ostream& stream() { return stream_; } - - private: - void Init(const char* file, int line); - - LogSeverity severity_; - std::ostrstream stream_; - int message_start_; // offset of the start of the message (past prefix info). - - DISALLOW_EVIL_CONSTRUCTORS(LogMessage); -}; - -// A non-macro interface to the log facility; (useful -// when the logging level is not a compile-time constant). -inline void LogAtLevel(int const log_level, std::string const &msg) { - LogMessage(__FILE__, __LINE__, log_level).stream() << msg; -} - -// This class is used to explicitly ignore values in the conditional -// logging macros. This avoids compiler warnings like "value computed -// is not used" and "statement has no effect". -class LogMessageVoidify { - public: - LogMessageVoidify() { } - // This has to be an operator with a precedence lower than << but - // higher than ?: - void operator&(std::ostream&) { } -}; - -// Closes the log file explicitly if open. -// NOTE: Since the log file is opened as necessary by the action of logging -// statements, there's no guarantee that it will stay closed -// after this call. -void CloseLogFile(); - -} // namespace Logging - -// These functions are provided as a convenience for logging, which is where we -// use streams (it is against Google style to use streams in other places). It -// is designed to allow you to emit non-ASCII Unicode strings to the log file, -// which is normally ASCII. It is relatively slow, so try not to use it for -// common cases. Non-ASCII characters will be converted to UTF-8 by these operators. -std::ostream& operator<<(std::ostream& out, const wchar_t* wstr); -inline std::ostream& operator<<(std::ostream& out, const std::wstring& wstr) { - return out << wstr.c_str(); -} - -#endif // BASE_LOGGING_H__ diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/scoped_ptr.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/scoped_ptr.h.svn-base deleted file mode 100644 index de0b388cb..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/scoped_ptr.h.svn-base +++ /dev/null @@ -1,322 +0,0 @@ -#ifndef BASE_SCOPED_PTR_H -#define BASE_SCOPED_PTR_H - -// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999. -// Copyright (c) 2001, 2002 Peter Dimov -// -// Permission to copy, use, modify, sell and distribute this software -// is granted provided this copyright notice appears in all copies. -// This software is provided "as is" without express or implied -// warranty, and with no claim as to its suitability for any purpose. -// -// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation. -// - -// scoped_ptr mimics a built-in pointer except that it guarantees deletion -// of the object pointed to, either on destruction of the scoped_ptr or via -// an explicit reset(). scoped_ptr is a simple solution for simple needs; -// use shared_ptr or std::auto_ptr if your needs are more complex. - -// *** NOTE *** -// If your scoped_ptr is a class member of class FOO pointing to a -// forward declared type BAR (as shown below), then you MUST use a non-inlined -// version of the destructor. The destructor of a scoped_ptr (called from -// FOO's destructor) must have a complete definition of BAR in order to -// destroy it. Example: -// -// -- foo.h -- -// class BAR; -// -// class FOO { -// public: -// FOO(); -// ~FOO(); // Required for sources that instantiate class FOO to compile! -// -// private: -// scoped_ptr bar_; -// }; -// -// -- foo.cc -- -// #include "foo.h" -// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition. - -#include // for std::ptrdiff_t -#include // for assert -#include // for free() decl - -template -class scoped_ptr { - private: - - T* ptr; - - scoped_ptr(scoped_ptr const &); - scoped_ptr & operator=(scoped_ptr const &); - - public: - - typedef T element_type; - - explicit scoped_ptr(T* p = 0): ptr(p) {} - - ~scoped_ptr() { - typedef char type_must_be_complete[sizeof(T)]; - delete ptr; - } - - void reset(T* p = 0) { - typedef char type_must_be_complete[sizeof(T)]; - - if (ptr != p) { - delete ptr; - ptr = p; - } - } - - T& operator*() const { - assert(ptr != 0); - return *ptr; - } - - T* operator->() const { - assert(ptr != 0); - return ptr; - } - - bool operator==(T* p) const { - return ptr == p; - } - - bool operator!=(T* p) const { - return ptr != p; - } - - T* get() const { - return ptr; - } - - void swap(scoped_ptr & b) { - T* tmp = b.ptr; - b.ptr = ptr; - ptr = tmp; - } - - T* release() { - T* tmp = ptr; - ptr = 0; - return tmp; - } - - private: - - // no reason to use these: each scoped_ptr should have its own object - template bool operator==(scoped_ptr const& p) const; - template bool operator!=(scoped_ptr const& p) const; -}; - -template inline -void swap(scoped_ptr& a, scoped_ptr& b) { - a.swap(b); -} - -template inline -bool operator==(T* p, const scoped_ptr& b) { - return p == b.get(); -} - -template inline -bool operator!=(T* p, const scoped_ptr& b) { - return p != b.get(); -} - -// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to -// is guaranteed, either on destruction of the scoped_array or via an explicit -// reset(). Use shared_array or std::vector if your needs are more complex. - -template -class scoped_array { - private: - - T* ptr; - - scoped_array(scoped_array const &); - scoped_array & operator=(scoped_array const &); - - public: - - typedef T element_type; - - explicit scoped_array(T* p = 0) : ptr(p) {} - - ~scoped_array() { - typedef char type_must_be_complete[sizeof(T)]; - delete[] ptr; - } - - void reset(T* p = 0) { - typedef char type_must_be_complete[sizeof(T)]; - - if (ptr != p) { - delete [] ptr; - ptr = p; - } - } - - T& operator[](std::ptrdiff_t i) const { - assert(ptr != 0); - assert(i >= 0); - return ptr[i]; - } - - bool operator==(T* p) const { - return ptr == p; - } - - bool operator!=(T* p) const { - return ptr != p; - } - - T* get() const { - return ptr; - } - - void swap(scoped_array & b) { - T* tmp = b.ptr; - b.ptr = ptr; - ptr = tmp; - } - - T* release() { - T* tmp = ptr; - ptr = 0; - return tmp; - } - - private: - - // no reason to use these: each scoped_array should have its own object - template bool operator==(scoped_array const& p) const; - template bool operator!=(scoped_array const& p) const; -}; - -template inline -void swap(::scoped_array& a, ::scoped_array& b) { - a.swap(b); -} - -template inline -bool operator==(T* p, const ::scoped_array& b) { - return p == b.get(); -} - -template inline -bool operator!=(T* p, const ::scoped_array& b) { - return p != b.get(); -} - - -// This class wraps the c library function free() in a class that can be -// passed as a template argument to scoped_ptr_malloc below. -class ScopedPtrMallocFree { - public: - inline void operator()(void* x) const { - free(x); - } -}; - -// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a -// second template argument, the functor used to free the object. - -template -class scoped_ptr_malloc { - private: - - T* ptr; - - scoped_ptr_malloc(scoped_ptr_malloc const &); - scoped_ptr_malloc & operator=(scoped_ptr_malloc const &); - - public: - - typedef T element_type; - - explicit scoped_ptr_malloc(T* p = 0): ptr(p) {} - - ~scoped_ptr_malloc() { - typedef char type_must_be_complete[sizeof(T)]; - free_((void*) ptr); - } - - void reset(T* p = 0) { - typedef char type_must_be_complete[sizeof(T)]; - - if (ptr != p) { - free_((void*) ptr); - ptr = p; - } - } - - T& operator*() const { - assert(ptr != 0); - return *ptr; - } - - T* operator->() const { - assert(ptr != 0); - return ptr; - } - - bool operator==(T* p) const { - return ptr == p; - } - - bool operator!=(T* p) const { - return ptr != p; - } - - T* get() const { - return ptr; - } - - void swap(scoped_ptr_malloc & b) { - T* tmp = b.ptr; - b.ptr = ptr; - ptr = tmp; - } - - T* release() { - T* tmp = ptr; - ptr = 0; - return tmp; - } - - private: - - // no reason to use these: each scoped_ptr_malloc should have its own object - template - bool operator==(scoped_ptr_malloc const& p) const; - template - bool operator!=(scoped_ptr_malloc const& p) const; - - static FreeProc const free_; -}; - -template -FP const scoped_ptr_malloc::free_ = FP(); - -template inline -void swap(scoped_ptr_malloc& a, scoped_ptr_malloc& b) { - a.swap(b); -} - -template inline -bool operator==(T* p, const scoped_ptr_malloc& b) { - return p == b.get(); -} - -template inline -bool operator!=(T* p, const scoped_ptr_malloc& b) { - return p != b.get(); -} - -#endif // #ifndef BASE_SCOPED_PTR_H diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/string16.cc.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/string16.cc.svn-base deleted file mode 100644 index eee3e7732..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/string16.cc.svn-base +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "base/string16.h" - -#include - -#ifdef WIN32 - -#error This file should not be used on 2-byte wchar_t systems -// If this winds up being needed on 2-byte wchar_t systems, either the -// definitions below can be used, or the host system's wide character -// functions like wmemcmp can be wrapped. - -#else // !WIN32 - -namespace base { - -int c16memcmp(const char16* s1, const char16* s2, size_t n) { - // We cannot call memcmp because that changes the semantics. - while (n-- > 0) { - if (*s1 != *s2) { - // We cannot use (*s1 - *s2) because char16 is unsigned. - return ((*s1 < *s2) ? -1 : 1); - } - ++s1; - ++s2; - } - return 0; -} - -size_t c16len(const char16* s) { - const char16 *s_orig = s; - while (*s) { - ++s; - } - return s - s_orig; -} - -const char16* c16memchr(const char16* s, char16 c, size_t n) { - while (n-- > 0) { - if (*s == c) { - return s; - } - ++s; - } - return 0; -} - -char16* c16memmove(char16* s1, const char16* s2, size_t n) { - return reinterpret_cast(memmove(s1, s2, n * sizeof(char16))); -} - -char16* c16memcpy(char16* s1, const char16* s2, size_t n) { - return reinterpret_cast(memcpy(s1, s2, n * sizeof(char16))); -} - -char16* c16memset(char16* s, char16 c, size_t n) { - char16 *s_orig = s; - while (n-- > 0) { - *s = c; - ++s; - } - return s_orig; -} - -} // namespace base - -template class std::basic_string; - -#endif // WIN32 diff --git a/ePub3/ThirdParty/google-url/base/.svn/text-base/string16.h.svn-base b/ePub3/ThirdParty/google-url/base/.svn/text-base/string16.h.svn-base deleted file mode 100644 index 9e0fd1de6..000000000 --- a/ePub3/ThirdParty/google-url/base/.svn/text-base/string16.h.svn-base +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef BASE_STRING16_H_ -#define BASE_STRING16_H_ - -// WHAT: -// A version of std::basic_string that provides 2-byte characters even when -// wchar_t is not implemented as a 2-byte type. You can access this class as -// string16. We also define char16, which string16 is based upon. -// -// WHY: -// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 -// data. Plenty of existing code operates on strings encoded as UTF-16. -// -// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make -// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails -// at run time, because it calls some functions (like wcslen) that come from -// the system's native C library -- which was built with a 4-byte wchar_t! -// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's -// entirely improper on those systems where the encoding of wchar_t is defined -// as UTF-32. -// -// Here, we define string16, which is similar to std::wstring but replaces all -// libc functions with custom, 2-byte-char compatible routines. It is capable -// of carrying UTF-16-encoded data. - -#include - -#include "base/basictypes.h" - -#ifdef WIN32 - -typedef wchar_t char16; -typedef std::wstring string16; - -#else // !WIN32 - -typedef uint16 char16; - -namespace base { - -// char16 versions of the functions required by string16_char_traits; these -// are based on the wide character functions of similar names ("w" or "wcs" -// instead of "c16"). -int c16memcmp(const char16* s1, const char16* s2, size_t n); -size_t c16len(const char16* s); -const char16* c16memchr(const char16* s, char16 c, size_t n); -char16* c16memmove(char16* s1, const char16* s2, size_t n); -char16* c16memcpy(char16* s1, const char16* s2, size_t n); -char16* c16memset(char16* s, char16 c, size_t n); - -struct string16_char_traits { - typedef char16 char_type; - typedef int int_type; - - typedef std::streamoff off_type; - typedef mbstate_t state_type; - typedef std::fpos pos_type; - - static void assign(char_type& c1, const char_type& c2) { - c1 = c2; - } - - static bool eq(const char_type& c1, const char_type& c2) { - return c1 == c2; - } - static bool lt(const char_type& c1, const char_type& c2) { - return c1 < c2; - } - - static int compare(const char_type* s1, const char_type* s2, size_t n) { - return c16memcmp(s1, s2, n); - } - - static size_t length(const char_type* s) { - return c16len(s); - } - - static const char_type* find(const char_type* s, size_t n, - const char_type& a) { - return c16memchr(s, a, n); - } - - static char_type* move(char_type* s1, const char_type* s2, int_type n) { - return c16memmove(s1, s2, n); - } - - static char_type* copy(char_type* s1, const char_type* s2, size_t n) { - return c16memcpy(s1, s2, n); - } - - static char_type* assign(char_type* s, size_t n, char_type a) { - return c16memset(s, a, n); - } - - static int_type not_eof(const int_type& c) { - return eq_int_type(c, eof()) ? 0 : c; - } - - static char_type to_char_type(const int_type& c) { - return char_type(c); - } - - static int_type to_int_type(const char_type& c) { - return int_type(c); - } - - static bool eq_int_type(const int_type& c1, const int_type& c2) { - return c1 == c2; - } - - static int_type eof() { - return static_cast(EOF); - } -}; - -} // namespace base - -// The string class will be explicitly instantiated only once, in string16.cc. -// -// std::basic_string<> in GNU libstdc++ contains a static data member, -// _S_empty_rep_storage, to represent empty strings. When an operation such -// as assignment or destruction is performed on a string, causing its existing -// data member to be invalidated, it must not be freed if this static data -// member is being used. Otherwise, it counts as an attempt to free static -// (and not allocated) data, which is a memory error. -// -// Generally, due to C++ template magic, _S_empty_rep_storage will be marked -// as a coalesced symbol, meaning that the linker will combine multiple -// instances into a single one when generating output. -// -// If a string class is used by multiple shared libraries, a problem occurs. -// Each library will get its own copy of _S_empty_rep_storage. When strings -// are passed across a library boundary for alteration or destruction, memory -// errors will result. GNU libstdc++ contains a configuration option, -// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which -// disables the static data member optimization, but it's a good optimization -// and non-STL code is generally at the mercy of the system's STL -// configuration. Fully-dynamic strings are not the default for GNU libstdc++ -// libstdc++ itself or for the libstdc++ installations on the systems we care -// about, such as Mac OS X and relevant flavors of Linux. -// -// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . -// -// To avoid problems, string classes need to be explicitly instantiated only -// once, in exactly one library. All other string users see it via an "extern" -// declaration. This is precisely how GNU libstdc++ handles -// std::basic_string (string) and std::basic_string (wstring). -// -// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), -// in which the linker does not fully coalesce symbols when dead code -// stripping is enabled. This bug causes the memory errors described above -// to occur even when a std::basic_string<> does not cross shared library -// boundaries, such as in statically-linked executables. -// -// TODO(mark): File this bug with Apple and update this note with a bug number. - -extern template class std::basic_string; - -typedef std::basic_string string16; - -extern std::ostream& operator<<(std::ostream& out, const string16& str); - -#endif // !WIN32 - -#endif // BASE_STRING16_H_ diff --git a/ePub3/ThirdParty/google-url/src/.svn/all-wcprops b/ePub3/ThirdParty/google-url/src/.svn/all-wcprops deleted file mode 100644 index 0a9c142e2..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/all-wcprops +++ /dev/null @@ -1,221 +0,0 @@ -K 25 -svn:wc:ra_dav:version-url -V 27 -/svn/!svn/ver/181/trunk/src -END -url_canon_path.cc -K 25 -svn:wc:ra_dav:version-url -V 45 -/svn/!svn/ver/174/trunk/src/url_canon_path.cc -END -url_util_unittest.cc -K 25 -svn:wc:ra_dav:version-url -V 48 -/svn/!svn/ver/181/trunk/src/url_util_unittest.cc -END -url_parse.h -K 25 -svn:wc:ra_dav:version-url -V 39 -/svn/!svn/ver/166/trunk/src/url_parse.h -END -url_canon_fileurl.cc -K 25 -svn:wc:ra_dav:version-url -V 48 -/svn/!svn/ver/100/trunk/src/url_canon_fileurl.cc -END -url_file.h -K 25 -svn:wc:ra_dav:version-url -V 38 -/svn/!svn/ver/100/trunk/src/url_file.h -END -url_canon_query.cc -K 25 -svn:wc:ra_dav:version-url -V 46 -/svn/!svn/ver/100/trunk/src/url_canon_query.cc -END -url_canon_filesystemurl.cc -K 25 -svn:wc:ra_dav:version-url -V 54 -/svn/!svn/ver/168/trunk/src/url_canon_filesystemurl.cc -END -url_canon_ip.cc -K 25 -svn:wc:ra_dav:version-url -V 43 -/svn/!svn/ver/179/trunk/src/url_canon_ip.cc -END -gurl_unittest.cc -K 25 -svn:wc:ra_dav:version-url -V 44 -/svn/!svn/ver/181/trunk/src/gurl_unittest.cc -END -url_canon_relative.cc -K 25 -svn:wc:ra_dav:version-url -V 49 -/svn/!svn/ver/166/trunk/src/url_canon_relative.cc -END -url_common.h -K 25 -svn:wc:ra_dav:version-url -V 40 -/svn/!svn/ver/154/trunk/src/url_common.h -END -url_canon_host.cc -K 25 -svn:wc:ra_dav:version-url -V 45 -/svn/!svn/ver/118/trunk/src/url_canon_host.cc -END -url_canon_internal.h -K 25 -svn:wc:ra_dav:version-url -V 48 -/svn/!svn/ver/162/trunk/src/url_canon_internal.h -END -url_canon_pathurl.cc -K 25 -svn:wc:ra_dav:version-url -V 48 -/svn/!svn/ver/100/trunk/src/url_canon_pathurl.cc -END -url_canon_unittest.cc -K 25 -svn:wc:ra_dav:version-url -V 49 -/svn/!svn/ver/181/trunk/src/url_canon_unittest.cc -END -url_canon_icu.h -K 25 -svn:wc:ra_dav:version-url -V 43 -/svn/!svn/ver/162/trunk/src/url_canon_icu.h -END -url_parse_internal.h -K 25 -svn:wc:ra_dav:version-url -V 48 -/svn/!svn/ver/100/trunk/src/url_parse_internal.h -END -url_parse_file.cc -K 25 -svn:wc:ra_dav:version-url -V 45 -/svn/!svn/ver/100/trunk/src/url_parse_file.cc -END -url_parse_unittest.cc -K 25 -svn:wc:ra_dav:version-url -V 49 -/svn/!svn/ver/173/trunk/src/url_parse_unittest.cc -END -url_parse.cc -K 25 -svn:wc:ra_dav:version-url -V 40 -/svn/!svn/ver/169/trunk/src/url_parse.cc -END -url_util_internal.h -K 25 -svn:wc:ra_dav:version-url -V 47 -/svn/!svn/ver/166/trunk/src/url_util_internal.h -END -url_util.cc -K 25 -svn:wc:ra_dav:version-url -V 39 -/svn/!svn/ver/181/trunk/src/url_util.cc -END -url_util.h -K 25 -svn:wc:ra_dav:version-url -V 38 -/svn/!svn/ver/157/trunk/src/url_util.h -END -url_canon_mailtourl.cc -K 25 -svn:wc:ra_dav:version-url -V 50 -/svn/!svn/ver/100/trunk/src/url_canon_mailtourl.cc -END -gurl.cc -K 25 -svn:wc:ra_dav:version-url -V 35 -/svn/!svn/ver/173/trunk/src/gurl.cc -END -url_canon_ip.h -K 25 -svn:wc:ra_dav:version-url -V 42 -/svn/!svn/ver/179/trunk/src/url_canon_ip.h -END -gurl.h -K 25 -svn:wc:ra_dav:version-url -V 34 -/svn/!svn/ver/180/trunk/src/gurl.h -END -url_canon_etc.cc -K 25 -svn:wc:ra_dav:version-url -V 44 -/svn/!svn/ver/146/trunk/src/url_canon_etc.cc -END -url_canon_internal.cc -K 25 -svn:wc:ra_dav:version-url -V 49 -/svn/!svn/ver/157/trunk/src/url_canon_internal.cc -END -gurl_test_main.cc -K 25 -svn:wc:ra_dav:version-url -V 45 -/svn/!svn/ver/153/trunk/src/gurl_test_main.cc -END -url_canon_stdurl.cc -K 25 -svn:wc:ra_dav:version-url -V 47 -/svn/!svn/ver/123/trunk/src/url_canon_stdurl.cc -END -url_canon_internal_file.h -K 25 -svn:wc:ra_dav:version-url -V 52 -/svn/!svn/ver/97/trunk/src/url_canon_internal_file.h -END -url_canon_icu.cc -K 25 -svn:wc:ra_dav:version-url -V 44 -/svn/!svn/ver/162/trunk/src/url_canon_icu.cc -END -url_canon.h -K 25 -svn:wc:ra_dav:version-url -V 39 -/svn/!svn/ver/175/trunk/src/url_canon.h -END -url_test_utils.h -K 25 -svn:wc:ra_dav:version-url -V 44 -/svn/!svn/ver/144/trunk/src/url_test_utils.h -END -url_canon_stdstring.h -K 25 -svn:wc:ra_dav:version-url -V 49 -/svn/!svn/ver/158/trunk/src/url_canon_stdstring.h -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/entries b/ePub3/ThirdParty/google-url/src/.svn/entries deleted file mode 100644 index 6c5bcff32..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/entries +++ /dev/null @@ -1,1252 +0,0 @@ -10 - -dir -181 -http://google-url.googlecode.com/svn/trunk/src -http://google-url.googlecode.com/svn - - - -2012-12-20T21:49:49.616818Z -181 -brettw - - - - - - - - - - - - - - -8873c55e-713a-0410-88f8-23d9c3d90b1b - -url_canon_path.cc -file - - - - -2013-01-16T16:12:51.000000Z -d72152a76b3f1d0cb6420e246f009a1b -2012-05-09T17:45:26.197825Z -174 -ericu@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -17812 - -url_util_unittest.cc -file - - - - -2013-01-16T16:12:51.000000Z -13044c472f3694e90f51e2d521fdfd69 -2012-12-20T21:49:49.616818Z -181 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -13593 - -url_parse.h -file - - - - -2013-01-16T16:12:51.000000Z -89b3dd68af0cb7556d2199319c36071d -2012-01-04T22:47:15.449237Z -166 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -15514 - -url_canon_fileurl.cc -file - - - - -2013-01-16T16:12:51.000000Z -fbccbaf1d11e4b910e4d0aa2588a1877 -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -8961 - -url_file.h -file - - - - -2013-01-16T16:12:51.000000Z -8bdf31a548fd2d5e3aa4e226245ff0ef -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -4315 - -url_canon_query.cc -file - - - - -2013-01-16T16:12:51.000000Z -068272a2607cb7ffd4cc92e74b2385fa -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -8194 - -url_canon_filesystemurl.cc -file - - - - -2013-01-16T16:12:51.000000Z -9bd23f9765feec4c19975954a371cf69 -2012-01-19T02:40:02.747504Z -168 -eroman@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -7038 - -url_canon_ip.cc -file - - - - -2013-01-16T16:12:51.000000Z -3730b3b896f7f22616e540a33d950553 -2012-09-06T20:56:41.093102Z -179 -eroman@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -26355 - -gurl_unittest.cc -file - - - - -2013-01-16T16:12:51.000000Z -47f5254a1dfedd9aba2b58176edc1b26 -2012-12-20T21:49:49.616818Z -181 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -17509 - -url_canon_relative.cc -file - - - - -2013-01-16T16:12:51.000000Z -91828007ca980089be1de5e288b86aec -2012-01-04T22:47:15.449237Z -166 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -24840 - -url_common.h -file - - - - -2013-01-16T16:12:51.000000Z -340bc1b83c3f5d964d629f46618c2c76 -2011-04-26T15:58:09.056442Z -154 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -2036 - -url_canon_host.cc -file - - - - -2013-01-16T16:12:51.000000Z -bcd2aa0e0d5fd67773e77050bfcae56e -2009-09-23T17:12:43.915608Z -118 -brettw@gmail.com -has-props - - - - - - - - - - - - - - - - - - - - -17765 - -url_canon_internal.h -file - - - - -2013-01-16T16:12:51.000000Z -bfe71c53e4f5b22d48304b3076a2f2e0 -2011-11-10T19:55:36.315759Z -162 -eroman@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -20149 - -url_canon_pathurl.cc -file - - - - -2013-01-16T16:12:51.000000Z -ebcb859fc9d19963da99b3c4f5b3fa65 -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -5218 - -url_canon_unittest.cc -file - - - - -2013-01-16T16:12:51.000000Z -923fe50803947f07248cd9b0cbdd39ac -2012-12-20T21:49:49.616818Z -181 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -109321 - -url_canon_icu.h -file - - - - -2013-01-16T16:12:51.000000Z -d7fb07b76a4308027ed473f67bbf862a -2011-11-10T19:55:36.315759Z -162 -eroman@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -2632 - -url_parse_internal.h -file - - - - -2013-01-16T16:12:51.000000Z -41b7f8ebb65e9198786385c9d1b9358d -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -4774 - -url_parse_file.cc -file - - - - -2013-01-16T16:12:51.000000Z -79c571f7599fdc4b449309ccbdfe18eb -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -9947 - -url_parse_unittest.cc -file - - - - -2013-01-16T16:12:51.000000Z -90b6052f30cc234fb43d05ef7c63accb -2012-04-24T18:19:57.685062Z -173 -ericu@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -32192 - -url_parse.cc -file - - - - -2013-01-16T16:12:51.000000Z -15ed713945aa41e45ccf8a4cc80873ea -2012-03-26T19:44:24.470546Z -169 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -30154 - -url_util_internal.h -file - - - - -2013-01-16T16:12:51.000000Z -4e555fb00f45d75856ca2270e36399da -2012-01-04T22:47:15.449237Z -166 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -2415 - -url_util.cc -file - - - - -2013-01-16T16:12:51.000000Z -05b25d325e2eab4a151fcc55f900b5c1 -2012-12-20T21:49:49.616818Z -181 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -26047 - -url_util.h -file - - - - -2013-01-16T16:12:51.000000Z -ede47fcb577351ea7d1d12d37c75eca9 -2011-06-16T18:02:23.056453Z -157 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -11024 - -url_canon_mailtourl.cc -file - - - - -2013-01-16T16:12:51.000000Z -045d98ff23025693f2526ee8e132dbc0 -2009-03-25T19:39:03.613161Z -100 -mark@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -5373 - -gurl.cc -file - - - - -2013-01-16T16:12:51.000000Z -f681893aa6ca3642b0e7d8f43feebac8 -2012-04-24T18:19:57.685062Z -173 -ericu@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -17997 - -url_canon_ip.h -file - - - - -2013-01-16T16:12:51.000000Z -f5e93cf2a9494ccc77a1d9c2a5ac0f10 -2012-09-06T20:56:41.093102Z -179 -eroman@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -5238 - -gurl.h -file - - - - -2013-01-16T16:12:51.000000Z -52fa929600815ab8a700fc61946eaf3e -2012-10-12T19:49:50.864195Z -180 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -16601 - -url_canon_etc.cc -file - - - - -2013-01-16T16:12:51.000000Z -7058316648200c9142a461c75010513b -2010-09-21T20:05:13.230552Z -146 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -15791 - -url_canon_internal.cc -file - - - - -2013-01-16T16:12:51.000000Z -aa69a2a998f25e254db5bd82aa74d676 -2011-06-16T18:02:23.056453Z -157 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -19296 - -gurl_test_main.cc -file - - - - -2013-01-16T16:12:51.000000Z -581fc906e8a85beb83f4ed8a9d757a86 -2011-03-08T21:44:36.013257Z -153 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -3594 - -url_canon_internal_file.h -file - - - - -2013-01-16T16:12:51.000000Z -8306866565e4a442aaf76d0a2349e550 -2009-03-05T17:52:33.119165Z -97 -maruel@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -7045 - -url_canon_stdurl.cc -file - - - - -2013-01-16T16:12:51.000000Z -afe2b43c821f923bf56b9719ffdd76e1 -2010-02-12T21:20:26.633135Z -123 -brettw@gmail.com -has-props - - - - - - - - - - - - - - - - - - - - -8635 - -url_canon_icu.cc -file - - - - -2013-01-16T16:12:51.000000Z -bb134d8aeabf6a05b6724ed7f9a7fb48 -2011-11-10T19:55:36.315759Z -162 -eroman@chromium.org -has-props - - - - - - - - - - - - - - - - - - - - -7846 - -url_canon.h -file - - - - -2013-01-16T16:12:51.000000Z -d63102f1e71a183984e7d9820cc4e135 -2012-05-11T21:24:19.040939Z -175 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -40969 - -url_test_utils.h -file - - - - -2013-01-16T16:12:51.000000Z -528bfa6becbea89fdf06a9dd95213753 -2010-07-23T18:30:07.099800Z -144 -brettw@gmail.com -has-props - - - - - - - - - - - - - - - - - - - - -3112 - -url_canon_stdstring.h -file - - - - -2013-01-16T16:12:51.000000Z -a6d0ba800749c28125f2a91381003c3d -2011-06-27T18:11:15.574529Z -158 -brettw -has-props - - - - - - - - - - - - - - - - - - - - -5107 - diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_test_main.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_test_main.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_test_main.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_unittest.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/gurl_unittest.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_etc.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_etc.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_etc.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_filesystemurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_filesystemurl.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_filesystemurl.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_fileurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_fileurl.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_fileurl.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_host.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_host.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_host.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_icu.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal_file.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal_file.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_internal_file.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_ip.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_mailtourl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_mailtourl.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_mailtourl.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_path.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_path.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_path.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_pathurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_pathurl.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_pathurl.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_query.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_query.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_query.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_relative.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_relative.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_relative.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdstring.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdstring.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdstring.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdurl.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_stdurl.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_unittest.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_canon_unittest.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_common.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_common.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_common.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_file.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_file.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_file.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_file.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_file.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_file.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_internal.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_internal.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_internal.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_unittest.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_parse_unittest.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_test_utils.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_test_utils.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_test_utils.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_internal.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_internal.h.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_internal.h.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_unittest.cc.svn-base deleted file mode 100644 index abd5821e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/prop-base/url_util_unittest.cc.svn-base +++ /dev/null @@ -1,5 +0,0 @@ -K 13 -svn:eol-style -V 2 -LF -END diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.cc.svn-base deleted file mode 100644 index 0b0fb477c..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.cc.svn-base +++ /dev/null @@ -1,529 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifdef WIN32 -#include -#else -#include -#endif - -#include -#include - -#include "googleurl/src/gurl.h" - -#include "base/logging.h" -#include "googleurl/src/url_canon_stdstring.h" -#include "googleurl/src/url_util.h" - -namespace { - -// External template that can handle initialization of either character type. -// The input spec is given, and the canonical version will be placed in -// |*canonical|, along with the parsing of the canonical spec in |*parsed|. -template -bool InitCanonical(const STR& input_spec, - std::string* canonical, - url_parse::Parsed* parsed) { - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - canonical->reserve(input_spec.size() + 32); - url_canon::StdStringCanonOutput output(canonical); - bool success = url_util::Canonicalize( - input_spec.data(), static_cast(input_spec.length()), - NULL, &output, parsed); - - output.Complete(); // Must be done before using string. - return success; -} - -static std::string* empty_string = NULL; -static GURL* empty_gurl = NULL; - -#ifdef WIN32 - -// Returns a static reference to an empty string for returning a reference -// when there is no underlying string. -const std::string& EmptyStringForGURL() { - // Avoid static object construction/destruction on startup/shutdown. - if (!empty_string) { - // Create the string. Be careful that we don't break in the case that this - // is being called from multiple threads. Statics are not threadsafe. - std::string* new_empty_string = new std::string; - if (InterlockedCompareExchangePointer( - reinterpret_cast(&empty_string), new_empty_string, NULL)) { - // The old value was non-NULL, so no replacement was done. Another - // thread did the initialization out from under us. - delete new_empty_string; - } - } - return *empty_string; -} - -#else - -static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT; -static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT; - -void EmptyStringForGURLOnce(void) { - empty_string = new std::string; -} - -const std::string& EmptyStringForGURL() { - // Avoid static object construction/destruction on startup/shutdown. - pthread_once(&empty_string_once, EmptyStringForGURLOnce); - return *empty_string; -} - -#endif // WIN32 - -} // namespace - -GURL::GURL() : is_valid_(false), inner_url_(NULL) { -} - -GURL::GURL(const GURL& other) - : spec_(other.spec_), - is_valid_(other.is_valid_), - parsed_(other.parsed_), - inner_url_(NULL) { - if (other.inner_url_) - inner_url_ = new GURL(*other.inner_url_); - // Valid filesystem urls should always have an inner_url_. - DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); -} - -GURL::GURL(const std::string& url_string) : inner_url_(NULL) { - is_valid_ = InitCanonical(url_string, &spec_, &parsed_); - if (is_valid_ && SchemeIsFileSystem()) { - inner_url_ = - new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true); - } -} - -GURL::GURL(const string16& url_string) : inner_url_(NULL) { - is_valid_ = InitCanonical(url_string, &spec_, &parsed_); - if (is_valid_ && SchemeIsFileSystem()) { - inner_url_ = - new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true); - } -} - -GURL::GURL(const char* canonical_spec, size_t canonical_spec_len, - const url_parse::Parsed& parsed, bool is_valid) - : spec_(canonical_spec, canonical_spec_len), - is_valid_(is_valid), - parsed_(parsed), - inner_url_(NULL) { - if (is_valid_ && SchemeIsFileSystem()) { - inner_url_ = - new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true); - } - -#ifndef NDEBUG - // For testing purposes, check that the parsed canonical URL is identical to - // what we would have produced. Skip checking for invalid URLs have no meaning - // and we can't always canonicalize then reproducabely. - if (is_valid_) { - url_parse::Component scheme; - if (!url_util::FindAndCompareScheme(canonical_spec, canonical_spec_len, - "filesystem", &scheme) || - scheme.begin == parsed.scheme.begin) { - // We can't do this check on the inner_url of a filesystem URL, as - // canonical_spec actually points to the start of the outer URL, so we'd - // end up with infinite recursion in this constructor. - GURL test_url(spec_); - - DCHECK(test_url.is_valid_ == is_valid_); - DCHECK(test_url.spec_ == spec_); - - DCHECK(test_url.parsed_.scheme == parsed_.scheme); - DCHECK(test_url.parsed_.username == parsed_.username); - DCHECK(test_url.parsed_.password == parsed_.password); - DCHECK(test_url.parsed_.host == parsed_.host); - DCHECK(test_url.parsed_.port == parsed_.port); - DCHECK(test_url.parsed_.path == parsed_.path); - DCHECK(test_url.parsed_.query == parsed_.query); - DCHECK(test_url.parsed_.ref == parsed_.ref); - } - } -#endif -} - -GURL::~GURL() { - delete inner_url_; -} - -GURL& GURL::operator=(const GURL& other) { - spec_ = other.spec_; - is_valid_ = other.is_valid_; - parsed_ = other.parsed_; - delete inner_url_; - inner_url_ = NULL; - if (other.inner_url_) - inner_url_ = new GURL(*other.inner_url_); - // Valid filesystem urls should always have an inner_url_. - DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); - return *this; -} - -const std::string& GURL::spec() const { - if (is_valid_ || spec_.empty()) - return spec_; - - DCHECK(false) << "Trying to get the spec of an invalid URL!"; - return EmptyStringForGURL(); -} - -GURL GURL::Resolve(const std::string& relative) const { - return ResolveWithCharsetConverter(relative, NULL); -} -GURL GURL::Resolve(const string16& relative) const { - return ResolveWithCharsetConverter(relative, NULL); -} - -// Note: code duplicated below (it's inconvenient to use a template here). -GURL GURL::ResolveWithCharsetConverter( - const std::string& relative, - url_canon::CharsetConverter* charset_converter) const { - // Not allowed for invalid URLs. - if (!is_valid_) - return GURL(); - - GURL result; - - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); - url_canon::StdStringCanonOutput output(&result.spec_); - - if (!url_util::ResolveRelative( - spec_.data(), static_cast(spec_.length()), parsed_, - relative.data(), static_cast(relative.length()), - charset_converter, &output, &result.parsed_)) { - // Error resolving, return an empty URL. - return GURL(); - } - - output.Complete(); - result.is_valid_ = true; - if (result.SchemeIsFileSystem()) { - result.inner_url_ = new GURL(spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true); - } - return result; -} - -// Note: code duplicated above (it's inconvenient to use a template here). -GURL GURL::ResolveWithCharsetConverter( - const string16& relative, - url_canon::CharsetConverter* charset_converter) const { - // Not allowed for invalid URLs. - if (!is_valid_) - return GURL(); - - GURL result; - - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); - url_canon::StdStringCanonOutput output(&result.spec_); - - if (!url_util::ResolveRelative( - spec_.data(), static_cast(spec_.length()), parsed_, - relative.data(), static_cast(relative.length()), - charset_converter, &output, &result.parsed_)) { - // Error resolving, return an empty URL. - return GURL(); - } - - output.Complete(); - result.is_valid_ = true; - if (result.SchemeIsFileSystem()) { - result.inner_url_ = new GURL(spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true); - } - return result; -} - -// Note: code duplicated below (it's inconvenient to use a template here). -GURL GURL::ReplaceComponents( - const url_canon::Replacements& replacements) const { - GURL result; - - // Not allowed for invalid URLs. - if (!is_valid_) - return GURL(); - - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); - url_canon::StdStringCanonOutput output(&result.spec_); - - result.is_valid_ = url_util::ReplaceComponents( - spec_.data(), static_cast(spec_.length()), parsed_, replacements, - NULL, &output, &result.parsed_); - - output.Complete(); - if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_ = new GURL(spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true); - } - return result; -} - -// Note: code duplicated above (it's inconvenient to use a template here). -GURL GURL::ReplaceComponents( - const url_canon::Replacements& replacements) const { - GURL result; - - // Not allowed for invalid URLs. - if (!is_valid_) - return GURL(); - - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); - url_canon::StdStringCanonOutput output(&result.spec_); - - result.is_valid_ = url_util::ReplaceComponents( - spec_.data(), static_cast(spec_.length()), parsed_, replacements, - NULL, &output, &result.parsed_); - - output.Complete(); - if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_ = new GURL(spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true); - } - return result; -} - -GURL GURL::GetOrigin() const { - // This doesn't make sense for invalid or nonstandard URLs, so return - // the empty URL - if (!is_valid_ || !IsStandard()) - return GURL(); - - if (SchemeIsFileSystem()) - return inner_url_->GetOrigin(); - - url_canon::Replacements replacements; - replacements.ClearUsername(); - replacements.ClearPassword(); - replacements.ClearPath(); - replacements.ClearQuery(); - replacements.ClearRef(); - - return ReplaceComponents(replacements); -} - -GURL GURL::GetWithEmptyPath() const { - // This doesn't make sense for invalid or nonstandard URLs, so return - // the empty URL. - if (!is_valid_ || !IsStandard()) - return GURL(); - - // We could optimize this since we know that the URL is canonical, and we are - // appending a canonical path, so avoiding re-parsing. - GURL other(*this); - if (parsed_.path.len == 0) - return other; - - // Clear everything after the path. - other.parsed_.query.reset(); - other.parsed_.ref.reset(); - - // Set the path, since the path is longer than one, we can just set the - // first character and resize. - other.spec_[other.parsed_.path.begin] = '/'; - other.parsed_.path.len = 1; - other.spec_.resize(other.parsed_.path.begin + 1); - return other; -} - -bool GURL::IsStandard() const { - return url_util::IsStandard(spec_.data(), parsed_.scheme); -} - -bool GURL::SchemeIs(const char* lower_ascii_scheme) const { - if (parsed_.scheme.len <= 0) - return lower_ascii_scheme == NULL; - return url_util::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin, - spec_.data() + parsed_.scheme.end(), - lower_ascii_scheme); -} - -int GURL::IntPort() const { - if (parsed_.port.is_nonempty()) - return url_parse::ParsePort(spec_.data(), parsed_.port); - return url_parse::PORT_UNSPECIFIED; -} - -int GURL::EffectiveIntPort() const { - int int_port = IntPort(); - if (int_port == url_parse::PORT_UNSPECIFIED && IsStandard()) - return url_canon::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin, - parsed_.scheme.len); - return int_port; -} - -std::string GURL::ExtractFileName() const { - url_parse::Component file_component; - url_parse::ExtractFileName(spec_.data(), parsed_.path, &file_component); - return ComponentString(file_component); -} - -std::string GURL::PathForRequest() const { - DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty"; - if (parsed_.ref.len >= 0) { - // Clip off the reference when it exists. The reference starts after the # - // sign, so we have to subtract one to also remove it. - return std::string(spec_, parsed_.path.begin, - parsed_.ref.begin - parsed_.path.begin - 1); - } - // Compute the actual path length, rather than depending on the spec's - // terminator. If we're an inner_url, our spec continues on into our outer - // url's path/query/ref. - int path_len = parsed_.path.len; - if (parsed_.query.is_valid()) - path_len = parsed_.query.end() - parsed_.path.begin; - - return std::string(spec_, parsed_.path.begin, path_len); -} - -std::string GURL::HostNoBrackets() const { - // If host looks like an IPv6 literal, strip the square brackets. - url_parse::Component h(parsed_.host); - if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') { - h.begin++; - h.len -= 2; - } - return ComponentString(h); -} - -bool GURL::HostIsIPAddress() const { - if (!is_valid_ || spec_.empty()) - return false; - - url_canon::RawCanonOutputT ignored_output; - url_canon::CanonHostInfo host_info; - url_canon::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, - &ignored_output, &host_info); - return host_info.IsIPAddress(); -} - -#ifdef WIN32 - -const GURL& GURL::EmptyGURL() { - // Avoid static object construction/destruction on startup/shutdown. - if (!empty_gurl) { - // Create the string. Be careful that we don't break in the case that this - // is being called from multiple threads. - GURL* new_empty_gurl = new GURL; - if (InterlockedCompareExchangePointer( - reinterpret_cast(&empty_gurl), new_empty_gurl, NULL)) { - // The old value was non-NULL, so no replacement was done. Another - // thread did the initialization out from under us. - delete new_empty_gurl; - } - } - return *empty_gurl; -} - -#else - -void EmptyGURLOnce(void) { - empty_gurl = new GURL; -} - -const GURL& GURL::EmptyGURL() { - // Avoid static object construction/destruction on startup/shutdown. - pthread_once(&empty_gurl_once, EmptyGURLOnce); - return *empty_gurl; -} - -#endif // WIN32 - -bool GURL::DomainIs(const char* lower_ascii_domain, - int domain_len) const { - // Return false if this URL is not valid or domain is empty. - if (!is_valid_ || !domain_len) - return false; - - // FileSystem URLs have empty parsed_.host, so check this first. - if (SchemeIsFileSystem() && inner_url_) - return inner_url_->DomainIs(lower_ascii_domain, domain_len); - - if (!parsed_.host.is_nonempty()) - return false; - - // Check whether the host name is end with a dot. If yes, treat it - // the same as no-dot unless the input comparison domain is end - // with dot. - const char* last_pos = spec_.data() + parsed_.host.end() - 1; - int host_len = parsed_.host.len; - if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) { - last_pos--; - host_len--; - } - - // Return false if host's length is less than domain's length. - if (host_len < domain_len) - return false; - - // Compare this url whether belong specific domain. - const char* start_pos = spec_.data() + parsed_.host.begin + - host_len - domain_len; - - if (!url_util::LowerCaseEqualsASCII(start_pos, - last_pos + 1, - lower_ascii_domain, - lower_ascii_domain + domain_len)) - return false; - - // Check whether host has right domain start with dot, make sure we got - // right domain range. For example www.google.com has domain - // "google.com" but www.iamnotgoogle.com does not. - if ('.' != lower_ascii_domain[0] && host_len > domain_len && - '.' != *(start_pos - 1)) - return false; - - return true; -} - -void GURL::Swap(GURL* other) { - spec_.swap(other->spec_); - std::swap(is_valid_, other->is_valid_); - std::swap(parsed_, other->parsed_); - std::swap(inner_url_, other->inner_url_); -} - -std::ostream& operator<<(std::ostream& out, const GURL& url) { - return out << url.possibly_invalid_spec(); -} diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.h.svn-base deleted file mode 100644 index 76c595dba..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl.h.svn-base +++ /dev/null @@ -1,392 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLEURL_SRC_GURL_H__ -#define GOOGLEURL_SRC_GURL_H__ - -#include -#include - -#include "base/string16.h" -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_stdstring.h" -#include "googleurl/src/url_common.h" -#include "googleurl/src/url_parse.h" - -class GURL { - public: - typedef url_canon::StdStringReplacements Replacements; - typedef url_canon::StdStringReplacements ReplacementsW; - - // Creates an empty, invalid URL. - GURL_API GURL(); - - // Copy construction is relatively inexpensive, with most of the time going - // to reallocating the string. It does not re-parse. - GURL_API GURL(const GURL& other); - - // The narrow version requires the input be UTF-8. Invalid UTF-8 input will - // result in an invalid URL. - // - // The wide version should also take an encoding parameter so we know how to - // encode the query parameters. It is probably sufficient for the narrow - // version to assume the query parameter encoding should be the same as the - // input encoding. - GURL_API explicit GURL(const std::string& url_string - /*, output_param_encoding*/); - GURL_API explicit GURL(const string16& url_string - /*, output_param_encoding*/); - - // Constructor for URLs that have already been parsed and canonicalized. This - // is used for conversions from KURL, for example. The caller must supply all - // information associated with the URL, which must be correct and consistent. - GURL_API GURL(const char* canonical_spec, size_t canonical_spec_len, - const url_parse::Parsed& parsed, bool is_valid); - - GURL_API ~GURL(); - - GURL_API GURL& operator=(const GURL& other); - - // Returns true when this object represents a valid parsed URL. When not - // valid, other functions will still succeed, but you will not get canonical - // data out in the format you may be expecting. Instead, we keep something - // "reasonable looking" so that the user can see how it's busted if - // displayed to them. - bool is_valid() const { - return is_valid_; - } - - // Returns true if the URL is zero-length. Note that empty URLs are also - // invalid, and is_valid() will return false for them. This is provided - // because some users may want to treat the empty case differently. - bool is_empty() const { - return spec_.empty(); - } - - // Returns the raw spec, i.e., the full text of the URL, in canonical UTF-8, - // if the URL is valid. If the URL is not valid, this will assert and return - // the empty string (for safety in release builds, to keep them from being - // misused which might be a security problem). - // - // The URL will be ASCII except the reference fragment, which may be UTF-8. - // It is guaranteed to be valid UTF-8. - // - // The exception is for empty() URLs (which are !is_valid()) but this will - // return the empty string without asserting. - // - // Used invalid_spec() below to get the unusable spec of an invalid URL. This - // separation is designed to prevent errors that may cause security problems - // that could result from the mistaken use of an invalid URL. - GURL_API const std::string& spec() const; - - // Returns the potentially invalid spec for a the URL. This spec MUST NOT be - // modified or sent over the network. It is designed to be displayed in error - // messages to the user, as the apperance of the spec may explain the error. - // If the spec is valid, the valid spec will be returned. - // - // The returned string is guaranteed to be valid UTF-8. - const std::string& possibly_invalid_spec() const { - return spec_; - } - - // Getter for the raw parsed structure. This allows callers to locate parts - // of the URL within the spec themselves. Most callers should consider using - // the individual component getters below. - // - // The returned parsed structure will reference into the raw spec, which may - // or may not be valid. If you are using this to index into the spec, BE - // SURE YOU ARE USING possibly_invalid_spec() to get the spec, and that you - // don't do anything "important" with invalid specs. - const url_parse::Parsed& parsed_for_possibly_invalid_spec() const { - return parsed_; - } - - // Defiant equality operator! - bool operator==(const GURL& other) const { - return spec_ == other.spec_; - } - bool operator!=(const GURL& other) const { - return spec_ != other.spec_; - } - - // Allows GURL to used as a key in STL (for example, a std::set or std::map). - bool operator<(const GURL& other) const { - return spec_ < other.spec_; - } - - // Resolves a URL that's possibly relative to this object's URL, and returns - // it. Absolute URLs are also handled according to the rules of URLs on web - // pages. - // - // It may be impossible to resolve the URLs properly. If the input is not - // "standard" (SchemeIsStandard() == false) and the input looks relative, we - // can't resolve it. In these cases, the result will be an empty, invalid - // GURL. - // - // The result may also be a nonempty, invalid URL if the input has some kind - // of encoding error. In these cases, we will try to construct a "good" URL - // that may have meaning to the user, but it will be marked invalid. - // - // It is an error to resolve a URL relative to an invalid URL. The result - // will be the empty URL. - GURL_API GURL Resolve(const std::string& relative) const; - GURL_API GURL Resolve(const string16& relative) const; - - // Like Resolve() above but takes a character set encoder which will be used - // for any query text specified in the input. The charset converter parameter - // may be NULL, in which case it will be treated as UTF-8. - // - // TODO(brettw): These should be replaced with versions that take something - // more friendly than a raw CharsetConverter (maybe like an ICU character set - // name). - GURL_API GURL ResolveWithCharsetConverter( - const std::string& relative, - url_canon::CharsetConverter* charset_converter) const; - GURL_API GURL ResolveWithCharsetConverter( - const string16& relative, - url_canon::CharsetConverter* charset_converter) const; - - // Creates a new GURL by replacing the current URL's components with the - // supplied versions. See the Replacements class in url_canon.h for more. - // - // These are not particularly quick, so avoid doing mutations when possible. - // Prefer the 8-bit version when possible. - // - // It is an error to replace components of an invalid URL. The result will - // be the empty URL. - // - // Note that we use the more general url_canon::Replacements type to give - // callers extra flexibility rather than our override. - GURL_API GURL ReplaceComponents( - const url_canon::Replacements& replacements) const; - GURL_API GURL ReplaceComponents( - const url_canon::Replacements& replacements) const; - - // A helper function that is equivalent to replacing the path with a slash - // and clearing out everything after that. We sometimes need to know just the - // scheme and the authority. If this URL is not a standard URL (it doesn't - // have the regular authority and path sections), then the result will be - // an empty, invalid GURL. Note that this *does* work for file: URLs, which - // some callers may want to filter out before calling this. - // - // It is an error to get an empty path on an invalid URL. The result - // will be the empty URL. - GURL_API GURL GetWithEmptyPath() const; - - // A helper function to return a GURL containing just the scheme, host, - // and port from a URL. Equivalent to clearing any username and password, - // replacing the path with a slash, and clearing everything after that. If - // this URL is not a standard URL, then the result will be an empty, - // invalid GURL. If the URL has neither username nor password, this - // degenerates to GetWithEmptyPath(). - // - // It is an error to get the origin of an invalid URL. The result - // will be the empty URL. - GURL_API GURL GetOrigin() const; - - // Returns true if the scheme for the current URL is a known "standard" - // scheme. Standard schemes have an authority and a path section. This - // includes file: and filesystem:, which some callers may want to filter out - // explicitly by calling SchemeIsFile[System]. - GURL_API bool IsStandard() const; - - // Returns true if the given parameter (should be lower-case ASCII to match - // the canonicalized scheme) is the scheme for this URL. This call is more - // efficient than getting the scheme and comparing it because no copies or - // object constructions are done. - GURL_API bool SchemeIs(const char* lower_ascii_scheme) const; - - // We often need to know if this is a file URL. File URLs are "standard", but - // are often treated separately by some programs. - bool SchemeIsFile() const { - return SchemeIs("file"); - } - - // FileSystem URLs need to be treated differently in some cases. - bool SchemeIsFileSystem() const { - return SchemeIs("filesystem"); - } - - // If the scheme indicates a secure connection - bool SchemeIsSecure() const { - return SchemeIs("https") || SchemeIs("wss") || - (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure()); - } - - // Returns true if the hostname is an IP address. Note: this function isn't - // as cheap as a simple getter because it re-parses the hostname to verify. - // This currently identifies only IPv4 addresses (bug 822685). - GURL_API bool HostIsIPAddress() const; - - // Getters for various components of the URL. The returned string will be - // empty if the component is empty or is not present. - std::string scheme() const { // Not including the colon. See also SchemeIs. - return ComponentString(parsed_.scheme); - } - std::string username() const { - return ComponentString(parsed_.username); - } - std::string password() const { - return ComponentString(parsed_.password); - } - // Note that this may be a hostname, an IPv4 address, or an IPv6 literal - // surrounded by square brackets, like "[2001:db8::1]". To exclude these - // brackets, use HostNoBrackets() below. - std::string host() const { - return ComponentString(parsed_.host); - } - std::string port() const { // Returns -1 if "default" - return ComponentString(parsed_.port); - } - std::string path() const { // Including first slash following host - return ComponentString(parsed_.path); - } - std::string query() const { // Stuff following '?' - return ComponentString(parsed_.query); - } - std::string ref() const { // Stuff following '#' - return ComponentString(parsed_.ref); - } - - // Existance querying. These functions will return true if the corresponding - // URL component exists in this URL. Note that existance is different than - // being nonempty. http://www.google.com/? has a query that just happens to - // be empty, and has_query() will return true. - bool has_scheme() const { - return parsed_.scheme.len >= 0; - } - bool has_username() const { - return parsed_.username.len >= 0; - } - bool has_password() const { - return parsed_.password.len >= 0; - } - bool has_host() const { - // Note that hosts are special, absense of host means length 0. - return parsed_.host.len > 0; - } - bool has_port() const { - return parsed_.port.len >= 0; - } - bool has_path() const { - // Note that http://www.google.com/" has a path, the path is "/". This can - // return false only for invalid or nonstandard URLs. - return parsed_.path.len >= 0; - } - bool has_query() const { - return parsed_.query.len >= 0; - } - bool has_ref() const { - return parsed_.ref.len >= 0; - } - - // Returns a parsed version of the port. Can also be any of the special - // values defined in Parsed for ExtractPort. - GURL_API int IntPort() const; - - // Returns the port number of the url, or the default port number. - // If the scheme has no concept of port (or unknown default) returns - // PORT_UNSPECIFIED. - GURL_API int EffectiveIntPort() const; - - // Extracts the filename portion of the path and returns it. The filename - // is everything after the last slash in the path. This may be empty. - GURL_API std::string ExtractFileName() const; - - // Returns the path that should be sent to the server. This is the path, - // parameter, and query portions of the URL. It is guaranteed to be ASCII. - GURL_API std::string PathForRequest() const; - - // Returns the host, excluding the square brackets surrounding IPv6 address - // literals. This can be useful for passing to getaddrinfo(). - GURL_API std::string HostNoBrackets() const; - - // Returns true if this URL's host matches or is in the same domain as - // the given input string. For example if this URL was "www.google.com", - // this would match "com", "google.com", and "www.google.com - // (input domain should be lower-case ASCII to match the canonicalized - // scheme). This call is more efficient than getting the host and check - // whether host has the specific domain or not because no copies or - // object constructions are done. - // - // If function DomainIs has parameter domain_len, which means the parameter - // lower_ascii_domain does not gurantee to terminate with NULL character. - GURL_API bool DomainIs(const char* lower_ascii_domain, int domain_len) const; - - // If function DomainIs only has parameter lower_ascii_domain, which means - // domain string should be terminate with NULL character. - bool DomainIs(const char* lower_ascii_domain) const { - return DomainIs(lower_ascii_domain, - static_cast(strlen(lower_ascii_domain))); - } - - // Swaps the contents of this GURL object with the argument without doing - // any memory allocations. - GURL_API void Swap(GURL* other); - - // Returns a reference to a singleton empty GURL. This object is for callers - // who return references but don't have anything to return in some cases. - // This function may be called from any thread. - GURL_API static const GURL& EmptyGURL(); - - // Returns the inner URL of a nested URL [currently only non-null for - // filesystem: URLs]. - const GURL* inner_url() const { - return inner_url_; - } - - private: - // Returns the substring of the input identified by the given component. - std::string ComponentString(const url_parse::Component& comp) const { - if (comp.len <= 0) - return std::string(); - return std::string(spec_, comp.begin, comp.len); - } - - // The actual text of the URL, in canonical ASCII form. - std::string spec_; - - // Set when the given URL is valid. Otherwise, we may still have a spec and - // components, but they may not identify valid resources (for example, an - // invalid port number, invalid characters in the scheme, etc.). - bool is_valid_; - - // Identified components of the canonical spec. - url_parse::Parsed parsed_; - - // Used for nested schemes [currently only filesystem:]. - GURL* inner_url_; - - // TODO bug 684583: Add encoding for query params. -}; - -// Stream operator so GURL can be used in assertion statements. -GURL_API std::ostream& operator<<(std::ostream& out, const GURL& url); - -#endif // GOOGLEURL_SRC_GURL_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_test_main.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_test_main.cc.svn-base deleted file mode 100644 index 43f19dfc7..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_test_main.cc.svn-base +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "build/build_config.h" - -#if defined(OS_WIN) -#include -#endif - -#include - -#include "testing/gtest/include/gtest/gtest.h" -#include "unicode/putil.h" -#include "unicode/udata.h" - -#define ICU_UTIL_DATA_SHARED 1 -#define ICU_UTIL_DATA_STATIC 2 - -#ifndef ICU_UTIL_DATA_IMPL - -#if defined(OS_WIN) -#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_SHARED -#elif defined(OS_MACOSX) -#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_STATIC -#elif defined(OS_LINUX) -#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_FILE -#endif - -#endif // ICU_UTIL_DATA_IMPL - -#if defined(OS_WIN) -#define ICU_UTIL_DATA_SYMBOL "icudt" U_ICU_VERSION_SHORT "_dat" -#define ICU_UTIL_DATA_SHARED_MODULE_NAME "icudt" U_ICU_VERSION_SHORT ".dll" -#endif - -bool InitializeICU() { -#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED) - // We expect to find the ICU data module alongside the current module. - // Because the module name is ASCII-only, "A" API should be safe. - // Chrome's copy of ICU dropped a version number XX from icudt dll, - // but 3rd-party embedders may need it. So, we try both. - HMODULE module = LoadLibraryA("icudt.dll"); - if (!module) { - module = LoadLibraryA(ICU_UTIL_DATA_SHARED_MODULE_NAME); - if (!module) - return false; - } - - FARPROC addr = GetProcAddress(module, ICU_UTIL_DATA_SYMBOL); - if (!addr) - return false; - - UErrorCode err = U_ZERO_ERROR; - udata_setCommonData(reinterpret_cast(addr), &err); - return err == U_ZERO_ERROR; -#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC) - // Mac bundles the ICU data in. - return true; -#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) - // We expect to find the ICU data module alongside the current module. - u_setDataDirectory("."); - // Only look for the packaged data file; - // the default behavior is to look for individual files. - UErrorCode err = U_ZERO_ERROR; - udata_setFileAccess(UDATA_ONLY_PACKAGES, &err); - return err == U_ZERO_ERROR; -#endif -} - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - - InitializeICU(); - - return RUN_ALL_TESTS(); -} diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_unittest.cc.svn-base deleted file mode 100644 index 670d2dffa..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/gurl_unittest.cc.svn-base +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright 2007 Google Inc. All Rights Reserved. -// Author: brettw@google.com (Brett Wilson) - -#include "googleurl/src/gurl.h" -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_test_utils.h" -#include "testing/gtest/include/gtest/gtest.h" - -// Some implementations of base/basictypes.h may define ARRAYSIZE. -// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro -// which is in our version of basictypes.h. -#ifndef ARRAYSIZE -#define ARRAYSIZE ARRAYSIZE_UNSAFE -#endif - -using url_test_utils::WStringToUTF16; -using url_test_utils::ConvertUTF8ToUTF16; - -namespace { - -template -void SetupReplacement(void (url_canon::Replacements::*func)(const CHAR*, - const url_parse::Component&), - url_canon::Replacements* replacements, - const CHAR* str) { - if (str) { - url_parse::Component comp; - if (str[0]) - comp.len = static_cast(strlen(str)); - (replacements->*func)(str, comp); - } -} - -// Returns the canonicalized string for the given URL string for the -// GURLTest.Types test. -std::string TypesTestCase(const char* src) { - GURL gurl(src); - return gurl.possibly_invalid_spec(); -} - -} // namespace - -// Different types of URLs should be handled differently by url_util, and -// handed off to different canonicalizers. -TEST(GURLTest, Types) { - // URLs with unknown schemes should be treated as path URLs, even when they - // have things like "://". - EXPECT_EQ("something:///HOSTNAME.com/", - TypesTestCase("something:///HOSTNAME.com/")); - - // In the reverse, known schemes should always trigger standard URL handling. - EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com")); - EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com")); - EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com")); - EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com")); - -#ifdef WIN32 - // URLs that look like absolute Windows drive specs. - EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt")); - EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt")); - EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt")); - EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt")); -#endif -} - -// Test the basic creation and querying of components in a GURL. We assume -// the parser is already tested and works, so we are mostly interested if the -// object does the right thing with the results. -TEST(GURLTest, Components) { - GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); - EXPECT_TRUE(url.is_valid()); - EXPECT_TRUE(url.SchemeIs("http")); - EXPECT_FALSE(url.SchemeIsFile()); - - // This is the narrow version of the URL, which should match the wide input. - EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec()); - - EXPECT_EQ("http", url.scheme()); - EXPECT_EQ("user", url.username()); - EXPECT_EQ("pass", url.password()); - EXPECT_EQ("google.com", url.host()); - EXPECT_EQ("99", url.port()); - EXPECT_EQ(99, url.IntPort()); - EXPECT_EQ("/foo;bar", url.path()); - EXPECT_EQ("q=a", url.query()); - EXPECT_EQ("ref", url.ref()); -} - -TEST(GURLTest, Empty) { - GURL url; - EXPECT_FALSE(url.is_valid()); - EXPECT_EQ("", url.spec()); - - EXPECT_EQ("", url.scheme()); - EXPECT_EQ("", url.username()); - EXPECT_EQ("", url.password()); - EXPECT_EQ("", url.host()); - EXPECT_EQ("", url.port()); - EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort()); - EXPECT_EQ("", url.path()); - EXPECT_EQ("", url.query()); - EXPECT_EQ("", url.ref()); -} - -TEST(GURLTest, Copy) { - GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); - - GURL url2(url); - EXPECT_TRUE(url2.is_valid()); - - EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec()); - EXPECT_EQ("http", url2.scheme()); - EXPECT_EQ("user", url2.username()); - EXPECT_EQ("pass", url2.password()); - EXPECT_EQ("google.com", url2.host()); - EXPECT_EQ("99", url2.port()); - EXPECT_EQ(99, url2.IntPort()); - EXPECT_EQ("/foo;bar", url2.path()); - EXPECT_EQ("q=a", url2.query()); - EXPECT_EQ("ref", url2.ref()); - - // Copying of invalid URL should be invalid - GURL invalid; - GURL invalid2(invalid); - EXPECT_FALSE(invalid2.is_valid()); - EXPECT_EQ("", invalid2.spec()); - EXPECT_EQ("", invalid2.scheme()); - EXPECT_EQ("", invalid2.username()); - EXPECT_EQ("", invalid2.password()); - EXPECT_EQ("", invalid2.host()); - EXPECT_EQ("", invalid2.port()); - EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort()); - EXPECT_EQ("", invalid2.path()); - EXPECT_EQ("", invalid2.query()); - EXPECT_EQ("", invalid2.ref()); -} - -TEST(GURLTest, CopyFileSystem) { - GURL url(WStringToUTF16(L"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref")); - - GURL url2(url); - EXPECT_TRUE(url2.is_valid()); - - EXPECT_EQ("filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref", url2.spec()); - EXPECT_EQ("filesystem", url2.scheme()); - EXPECT_EQ("", url2.username()); - EXPECT_EQ("", url2.password()); - EXPECT_EQ("", url2.host()); - EXPECT_EQ("", url2.port()); - EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url2.IntPort()); - EXPECT_EQ("/foo;bar", url2.path()); - EXPECT_EQ("q=a", url2.query()); - EXPECT_EQ("ref", url2.ref()); - - const GURL* inner = url2.inner_url(); - ASSERT_TRUE(inner); - EXPECT_EQ("https", inner->scheme()); - EXPECT_EQ("user", inner->username()); - EXPECT_EQ("pass", inner->password()); - EXPECT_EQ("google.com", inner->host()); - EXPECT_EQ("99", inner->port()); - EXPECT_EQ(99, inner->IntPort()); - EXPECT_EQ("/t", inner->path()); - EXPECT_EQ("", inner->query()); - EXPECT_EQ("", inner->ref()); -} - -// Given an invalid URL, we should still get most of the components. -TEST(GURLTest, Invalid) { - GURL url("http:google.com:foo"); - EXPECT_FALSE(url.is_valid()); - EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); - - EXPECT_EQ("http", url.scheme()); - EXPECT_EQ("", url.username()); - EXPECT_EQ("", url.password()); - EXPECT_EQ("google.com", url.host()); - EXPECT_EQ("foo", url.port()); - EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort()); - EXPECT_EQ("/", url.path()); - EXPECT_EQ("", url.query()); - EXPECT_EQ("", url.ref()); -} - -TEST(GURLTest, Resolve) { - // The tricky cases for relative URL resolving are tested in the - // canonicalizer unit test. Here, we just test that the GURL integration - // works properly. - struct ResolveCase { - const char* base; - const char* relative; - bool expected_valid; - const char* expected; - } resolve_cases[] = { - {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"}, - {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, - {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, - {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, - {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, - // A non-standard base can be replaced with a standard absolute URL. - {"data:blahblah", "http://google.com/", true, "http://google.com/"}, - {"data:blahblah", "http:google.com", true, "http://google.com/"}, - // Filesystem URLs have different paths to test. - {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, - {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) { - // 8-bit code path. - GURL input(resolve_cases[i].base); - GURL output = input.Resolve(resolve_cases[i].relative); - EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i; - EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i; - EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL); - - // Wide code path. - GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base)); - GURL outputw = - input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative)); - EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i; - EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i; - EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL); - } -} - -TEST(GURLTest, GetOrigin) { - struct TestCase { - const char* input; - const char* expected; - } cases[] = { - {"http://www.google.com", "http://www.google.com/"}, - {"javascript:window.alert(\"hello,world\");", ""}, - {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"}, - {"http://user@www.google.com", "http://www.google.com/"}, - {"http://:pass@www.google.com", "http://www.google.com/"}, - {"http://:@www.google.com", "http://www.google.com/"}, - {"filesystem:http://www.google.com/temp/foo?q#b", "http://www.google.com/"}, - {"filesystem:http://user:pass@google.com:21/blah#baz", "http://google.com:21/"}, - }; - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - GURL url(cases[i].input); - GURL origin = url.GetOrigin(); - EXPECT_EQ(cases[i].expected, origin.spec()); - } -} - -TEST(GURLTest, GetWithEmptyPath) { - struct TestCase { - const char* input; - const char* expected; - } cases[] = { - {"http://www.google.com", "http://www.google.com/"}, - {"javascript:window.alert(\"hello, world\");", ""}, - {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"}, - {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"}, - {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - GURL url(cases[i].input); - GURL empty_path = url.GetWithEmptyPath(); - EXPECT_EQ(cases[i].expected, empty_path.spec()); - } -} - -TEST(GURLTest, Replacements) { - // The url canonicalizer replacement test will handle most of these case. - // The most important thing to do here is to check that the proper - // canonicalizer gets called based on the scheme of the input. - struct ReplaceCase { - const char* base; - const char* scheme; - const char* username; - const char* password; - const char* host; - const char* port; - const char* path; - const char* query; - const char* ref; - const char* expected; - } replace_cases[] = { - {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"}, - {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"}, - {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"}, -#ifdef WIN32 - {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"}, -#endif - {"filesystem:http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "filesystem:http://www.google.com/foo/"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - GURL url(cur.base); - GURL::Replacements repl; - SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme); - SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username); - SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password); - SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host); - SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port); - SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path); - SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query); - SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref); - GURL output = url.ReplaceComponents(repl); - - EXPECT_EQ(replace_cases[i].expected, output.spec()); - EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL); - } -} - -TEST(GURLTest, PathForRequest) { - struct TestCase { - const char* input; - const char* expected; - const char* inner_expected; - } cases[] = { - {"http://www.google.com", "/", NULL}, - {"http://www.google.com/", "/", NULL}, - {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", NULL}, - {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", NULL}, - {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", NULL}, - {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", "/foo/bar.html?query", "/temporary"}, - {"filesystem:http://www.google.com/temporary/foo/bar.html?query", "/foo/bar.html?query", "/temporary"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - GURL url(cases[i].input); - std::string path_request = url.PathForRequest(); - EXPECT_EQ(cases[i].expected, path_request); - EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL); - if (url.inner_url() && cases[i].inner_expected) - EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest()); - } -} - -TEST(GURLTest, EffectiveIntPort) { - struct PortTest { - const char* spec; - int expected_int_port; - } port_tests[] = { - // http - {"http://www.google.com/", 80}, - {"http://www.google.com:80/", 80}, - {"http://www.google.com:443/", 443}, - - // https - {"https://www.google.com/", 443}, - {"https://www.google.com:443/", 443}, - {"https://www.google.com:80/", 80}, - - // ftp - {"ftp://www.google.com/", 21}, - {"ftp://www.google.com:21/", 21}, - {"ftp://www.google.com:80/", 80}, - - // gopher - {"gopher://www.google.com/", 70}, - {"gopher://www.google.com:70/", 70}, - {"gopher://www.google.com:80/", 80}, - - // file - no port - {"file://www.google.com/", url_parse::PORT_UNSPECIFIED}, - {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED}, - - // data - no port - {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED}, - {"data:www.google.com", url_parse::PORT_UNSPECIFIED}, - - // filesystem - no port - {"filesystem:http://www.google.com:90/t/foo", url_parse::PORT_UNSPECIFIED}, - {"filesystem:file:///t/foo", url_parse::PORT_UNSPECIFIED}, - }; - - for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) { - GURL url(port_tests[i].spec); - EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort()); - } -} - -TEST(GURLTest, IPAddress) { - struct IPTest { - const char* spec; - bool expected_ip; - } ip_tests[] = { - {"http://www.google.com/", false}, - {"http://192.168.9.1/", true}, - {"http://192.168.9.1.2/", false}, - {"http://192.168.m.1/", false}, - {"http://2001:db8::1/", false}, - {"http://[2001:db8::1]/", true}, - {"", false}, - {"some random input!", false}, - }; - - for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) { - GURL url(ip_tests[i].spec); - EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress()); - } -} - -TEST(GURLTest, HostNoBrackets) { - struct TestCase { - const char* input; - const char* expected_host; - const char* expected_plainhost; - } cases[] = { - {"http://www.google.com", "www.google.com", "www.google.com"}, - {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"}, - {"http://[::]/", "[::]", "::"}, - - // Don't require a valid URL, but don't crash either. - {"http://[]/", "[]", ""}, - {"http://[x]/", "[x]", "x"}, - {"http://[x/", "[x", "[x"}, - {"http://x]/", "x]", "x]"}, - {"http://[/", "[", "["}, - {"http://]/", "]", "]"}, - {"", "", ""}, - }; - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - GURL url(cases[i].input); - EXPECT_EQ(cases[i].expected_host, url.host()); - EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets()); - } -} - -TEST(GURLTest, DomainIs) { - const char google_domain[] = "google.com"; - - GURL url_1("http://www.google.com:99/foo"); - EXPECT_TRUE(url_1.DomainIs(google_domain)); - - GURL url_2("http://google.com:99/foo"); - EXPECT_TRUE(url_2.DomainIs(google_domain)); - - GURL url_3("http://google.com./foo"); - EXPECT_TRUE(url_3.DomainIs(google_domain)); - - GURL url_4("http://google.com/foo"); - EXPECT_FALSE(url_4.DomainIs("google.com.")); - - GURL url_5("http://google.com./foo"); - EXPECT_TRUE(url_5.DomainIs("google.com.")); - - GURL url_6("http://www.google.com./foo"); - EXPECT_TRUE(url_6.DomainIs(".com.")); - - GURL url_7("http://www.balabala.com/foo"); - EXPECT_FALSE(url_7.DomainIs(google_domain)); - - GURL url_8("http://www.google.com.cn/foo"); - EXPECT_FALSE(url_8.DomainIs(google_domain)); - - GURL url_9("http://www.iamnotgoogle.com/foo"); - EXPECT_FALSE(url_9.DomainIs(google_domain)); - - GURL url_10("http://www.iamnotgoogle.com../foo"); - EXPECT_FALSE(url_10.DomainIs(".com")); - - GURL url_11("filesystem:http://www.google.com:99/foo/"); - EXPECT_TRUE(url_11.DomainIs(google_domain)); - - GURL url_12("filesystem:http://www.iamnotgoogle.com/foo/"); - EXPECT_FALSE(url_12.DomainIs(google_domain)); -} - -// Newlines should be stripped from inputs. -TEST(GURLTest, Newlines) { - // Constructor. - GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n "); - EXPECT_EQ("http://www.google.com/asdf", url_1.spec()); - - // Relative path resolver. - GURL url_2 = url_1.Resolve(" \n /fo\to\r "); - EXPECT_EQ("http://www.google.com/foo", url_2.spec()); - - // Note that newlines are NOT stripped from ReplaceComponents. -} - -TEST(GURLTest, IsStandard) { - GURL a("http:foo/bar"); - EXPECT_TRUE(a.IsStandard()); - - GURL b("foo:bar/baz"); - EXPECT_FALSE(b.IsStandard()); - - GURL c("foo://bar/baz"); - EXPECT_FALSE(c.IsStandard()); -} diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon.h.svn-base deleted file mode 100644 index 00ae715af..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon.h.svn-base +++ /dev/null @@ -1,912 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#ifndef GOOGLEURL_SRC_URL_CANON_H__ -#define GOOGLEURL_SRC_URL_CANON_H__ - -#include -#include - -#include "base/string16.h" -#include "googleurl/src/url_common.h" -#include "googleurl/src/url_parse.h" - -namespace url_canon { - -// Canonicalizer output ------------------------------------------------------- - -// Base class for the canonicalizer output, this maintains a buffer and -// supports simple resizing and append operations on it. -// -// It is VERY IMPORTANT that no virtual function calls be made on the common -// code path. We only have two virtual function calls, the destructor and a -// resize function that is called when the existing buffer is not big enough. -// The derived class is then in charge of setting up our buffer which we will -// manage. -template -class CanonOutputT { - public: - CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) { - } - virtual ~CanonOutputT() { - } - - // Implemented to resize the buffer. This function should update the buffer - // pointer to point to the new buffer, and any old data up to |cur_len_| in - // the buffer must be copied over. - // - // The new size |sz| must be larger than buffer_len_. - virtual void Resize(int sz) = 0; - - // Accessor for returning a character at a given position. The input offset - // must be in the valid range. - inline char at(int offset) const { - return buffer_[offset]; - } - - // Sets the character at the given position. The given position MUST be less - // than the length(). - inline void set(int offset, int ch) { - buffer_[offset] = ch; - } - - // Returns the number of characters currently in the buffer. - inline int length() const { - return cur_len_; - } - - // Returns the current capacity of the buffer. The length() is the number of - // characters that have been declared to be written, but the capacity() is - // the number that can be written without reallocation. If the caller must - // write many characters at once, it can make sure there is enough capacity, - // write the data, then use set_size() to declare the new length(). - int capacity() const { - return buffer_len_; - } - - // Called by the user of this class to get the output. The output will NOT - // be NULL-terminated. Call length() to get the - // length. - const T* data() const { - return buffer_; - } - T* data() { - return buffer_; - } - - // Shortens the URL to the new length. Used for "backing up" when processing - // relative paths. This can also be used if an external function writes a lot - // of data to the buffer (when using the "Raw" version below) beyond the end, - // to declare the new length. - // - // This MUST NOT be used to expand the size of the buffer beyond capacity(). - void set_length(int new_len) { - cur_len_ = new_len; - } - - // This is the most performance critical function, since it is called for - // every character. - void push_back(T ch) { - // In VC2005, putting this common case first speeds up execution - // dramatically because this branch is predicted as taken. - if (cur_len_ < buffer_len_) { - buffer_[cur_len_] = ch; - cur_len_++; - return; - } - - // Grow the buffer to hold at least one more item. Hopefully we won't have - // to do this very often. - if (!Grow(1)) - return; - - // Actually do the insertion. - buffer_[cur_len_] = ch; - cur_len_++; - } - - // Appends the given string to the output. - void Append(const T* str, int str_len) { - if (cur_len_ + str_len > buffer_len_) { - if (!Grow(cur_len_ + str_len - buffer_len_)) - return; - } - for (int i = 0; i < str_len; i++) - buffer_[cur_len_ + i] = str[i]; - cur_len_ += str_len; - } - - protected: - // Grows the given buffer so that it can fit at least |min_additional| - // characters. Returns true if the buffer could be resized, false on OOM. - bool Grow(int min_additional) { - static const int kMinBufferLen = 16; - int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_; - do { - if (new_len >= (1 << 30)) // Prevent overflow below. - return false; - new_len *= 2; - } while (new_len < buffer_len_ + min_additional); - Resize(new_len); - return true; - } - - T* buffer_; - int buffer_len_; - - // Used characters in the buffer. - int cur_len_; -}; - -// Simple implementation of the CanonOutput using new[]. This class -// also supports a static buffer so if it is allocated on the stack, most -// URLs can be canonicalized with no heap allocations. -template -class RawCanonOutputT : public CanonOutputT { - public: - RawCanonOutputT() : CanonOutputT() { - this->buffer_ = fixed_buffer_; - this->buffer_len_ = fixed_capacity; - } - virtual ~RawCanonOutputT() { - if (this->buffer_ != fixed_buffer_) - delete[] this->buffer_; - } - - virtual void Resize(int sz) { - T* new_buf = new T[sz]; - memcpy(new_buf, this->buffer_, - sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz)); - if (this->buffer_ != fixed_buffer_) - delete[] this->buffer_; - this->buffer_ = new_buf; - this->buffer_len_ = sz; - } - - protected: - T fixed_buffer_[fixed_capacity]; -}; - -// Normally, all canonicalization output is in narrow characters. We support -// the templates so it can also be used internally if a wide buffer is -// required. -typedef CanonOutputT CanonOutput; -typedef CanonOutputT CanonOutputW; - -template -class RawCanonOutput : public RawCanonOutputT {}; -template -class RawCanonOutputW : public RawCanonOutputT {}; - -// Character set converter ---------------------------------------------------- -// -// Converts query strings into a custom encoding. The embedder can supply an -// implementation of this class to interface with their own character set -// conversion libraries. -// -// Embedders will want to see the unit test for the ICU version. - -class CharsetConverter { - public: - CharsetConverter() {} - virtual ~CharsetConverter() {} - - // Converts the given input string from UTF-16 to whatever output format the - // converter supports. This is used only for the query encoding conversion, - // which does not fail. Instead, the converter should insert "invalid - // character" characters in the output for invalid sequences, and do the - // best it can. - // - // If the input contains a character not representable in the output - // character set, the converter should append the HTML entity sequence in - // decimal, (such as "你") with escaping of the ampersand, number - // sign, and semicolon (in the previous example it would be - // "%26%2320320%3B"). This rule is based on what IE does in this situation. - virtual void ConvertFromUTF16(const char16* input, - int input_len, - CanonOutput* output) = 0; -}; - -// Whitespace ----------------------------------------------------------------- - -// Searches for whitespace that should be removed from the middle of URLs, and -// removes it. Removed whitespace are tabs and newlines, but NOT spaces. Spaces -// are preserved, which is what most browsers do. A pointer to the output will -// be returned, and the length of that output will be in |output_len|. -// -// This should be called before parsing if whitespace removal is desired (which -// it normally is when you are canonicalizing). -// -// If no whitespace is removed, this function will not use the buffer and will -// return a pointer to the input, to avoid the extra copy. If modification is -// required, the given |buffer| will be used and the returned pointer will -// point to the beginning of the buffer. -// -// Therefore, callers should not use the buffer, since it may actuall be empty, -// use the computed pointer and |*output_len| instead. -GURL_API const char* RemoveURLWhitespace(const char* input, int input_len, - CanonOutputT* buffer, - int* output_len); -GURL_API const char16* RemoveURLWhitespace(const char16* input, int input_len, - CanonOutputT* buffer, - int* output_len); - -// IDN ------------------------------------------------------------------------ - -// Converts the Unicode input representing a hostname to ASCII using IDN rules. -// The output must fall in the ASCII range, but will be encoded in UTF-16. -// -// On success, the output will be filled with the ASCII host name and it will -// return true. Unlike most other canonicalization functions, this assumes that -// the output is empty. The beginning of the host will be at offset 0, and -// the length of the output will be set to the length of the new host name. -// -// On error, returns false. The output in this case is undefined. -GURL_API bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output); - -// Piece-by-piece canonicalizers ---------------------------------------------- -// -// These individual canonicalizers append the canonicalized versions of the -// corresponding URL component to the given std::string. The spec and the -// previously-identified range of that component are the input. The range of -// the canonicalized component will be written to the output component. -// -// These functions all append to the output so they can be chained. Make sure -// the output is empty when you start. -// -// These functions returns boolean values indicating success. On failure, they -// will attempt to write something reasonable to the output so that, if -// displayed to the user, they will recognise it as something that's messed up. -// Nothing more should ever be done with these invalid URLs, however. - -// Scheme: Appends the scheme and colon to the URL. The output component will -// indicate the range of characters up to but not including the colon. -// -// Canonical URLs always have a scheme. If the scheme is not present in the -// input, this will just write the colon to indicate an empty scheme. Does not -// append slashes which will be needed before any authority components for most -// URLs. -// -// The 8-bit version requires UTF-8 encoding. -GURL_API bool CanonicalizeScheme(const char* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme); -GURL_API bool CanonicalizeScheme(const char16* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme); - -// User info: username/password. If present, this will add the delimiters so -// the output will be ":@" or "@". Empty -// username/password pairs, or empty passwords, will get converted to -// nonexistant in the canonical version. -// -// The components for the username and password refer to ranges in the -// respective source strings. Usually, these will be the same string, which -// is legal as long as the two components don't overlap. -// -// The 8-bit version requires UTF-8 encoding. -GURL_API bool CanonicalizeUserInfo(const char* username_source, - const url_parse::Component& username, - const char* password_source, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password); -GURL_API bool CanonicalizeUserInfo(const char16* username_source, - const url_parse::Component& username, - const char16* password_source, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password); - - -// This structure holds detailed state exported from the IP/Host canonicalizers. -// Additional fields may be added as callers require them. -struct CanonHostInfo { - CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {} - - // Convenience function to test if family is an IP address. - bool IsIPAddress() const { return family == IPV4 || family == IPV6; } - - // This field summarizes how the input was classified by the canonicalizer. - enum Family { - NEUTRAL, // - Doesn't resemble an IP address. As far as the IP - // canonicalizer is concerned, it should be treated as a - // hostname. - BROKEN, // - Almost an IP, but was not canonicalized. This could be an - // IPv4 address where truncation occurred, or something - // containing the special characters :[] which did not parse - // as an IPv6 address. Never attempt to connect to this - // address, because it might actually succeed! - IPV4, // - Successfully canonicalized as an IPv4 address. - IPV6, // - Successfully canonicalized as an IPv6 address. - }; - Family family; - - // If |family| is IPV4, then this is the number of nonempty dot-separated - // components in the input text, from 1 to 4. If |family| is not IPV4, - // this value is undefined. - int num_ipv4_components; - - // Location of host within the canonicalized output. - // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6. - // CanonicalizeHostVerbose() always sets it. - url_parse::Component out_host; - - // |address| contains the parsed IP Address (if any) in its first - // AddressLength() bytes, in network order. If IsIPAddress() is false - // AddressLength() will return zero and the content of |address| is undefined. - unsigned char address[16]; - - // Convenience function to calculate the length of an IP address corresponding - // to the current IP version in |family|, if any. For use with |address|. - int AddressLength() const { - return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0); - } -}; - - -// Host. -// -// The 8-bit version requires UTF-8 encoding. Use this version when you only -// need to know whether canonicalization succeeded. -GURL_API bool CanonicalizeHost(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - url_parse::Component* out_host); -GURL_API bool CanonicalizeHost(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - url_parse::Component* out_host); - -// Extended version of CanonicalizeHost, which returns additional information. -// Use this when you need to know whether the hostname was an IP address. -// A successful return is indicated by host_info->family != BROKEN. See the -// definition of CanonHostInfo above for details. -GURL_API void CanonicalizeHostVerbose(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); -GURL_API void CanonicalizeHostVerbose(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); - - -// IP addresses. -// -// Tries to interpret the given host name as an IPv4 or IPv6 address. If it is -// an IP address, it will canonicalize it as such, appending it to |output|. -// Additional status information is returned via the |*host_info| parameter. -// See the definition of CanonHostInfo above for details. -// -// This is called AUTOMATICALLY from the host canonicalizer, which ensures that -// the input is unescaped and name-prepped, etc. It should not normally be -// necessary or wise to call this directly. -GURL_API void CanonicalizeIPAddress(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); -GURL_API void CanonicalizeIPAddress(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); - -// Port: this function will add the colon for the port if a port is present. -// The caller can pass url_parse::PORT_UNSPECIFIED as the -// default_port_for_scheme argument if there is no default port. -// -// The 8-bit version requires UTF-8 encoding. -GURL_API bool CanonicalizePort(const char* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port); -GURL_API bool CanonicalizePort(const char16* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port); - -// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED -// if the scheme is unknown. -GURL_API int DefaultPortForScheme(const char* scheme, int scheme_len); - -// Path. If the input does not begin in a slash (including if the input is -// empty), we'll prepend a slash to the path to make it canonical. -// -// The 8-bit version assumes UTF-8 encoding, but does not verify the validity -// of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid -// characters, etc.). Normally, URLs will come in as UTF-16, so this isn't -// an issue. Somebody giving us an 8-bit path is responsible for generating -// the path that the server expects (we'll escape high-bit characters), so -// if something is invalid, it's their problem. -GURL_API bool CanonicalizePath(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); -GURL_API bool CanonicalizePath(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); - -// Canonicalizes the input as a file path. This is like CanonicalizePath except -// that it also handles Windows drive specs. For example, the path can begin -// with "c|\" and it will get properly canonicalized to "C:/". -// The string will be appended to |*output| and |*out_path| will be updated. -// -// The 8-bit version requires UTF-8 encoding. -GURL_API bool FileCanonicalizePath(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); -GURL_API bool FileCanonicalizePath(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); - -// Query: Prepends the ? if needed. -// -// The 8-bit version requires the input to be UTF-8 encoding. Incorrectly -// encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode -// "invalid character." This function can not fail, we always just try to do -// our best for crazy input here since web pages can set it themselves. -// -// This will convert the given input into the output encoding that the given -// character set converter object provides. The converter will only be called -// if necessary, for ASCII input, no conversions are necessary. -// -// The converter can be NULL. In this case, the output encoding will be UTF-8. -GURL_API void CanonicalizeQuery(const char* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query); -GURL_API void CanonicalizeQuery(const char16* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query); - -// Ref: Prepends the # if needed. The output will be UTF-8 (this is the only -// canonicalizer that does not produce ASCII output). The output is -// guaranteed to be valid UTF-8. -// -// This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use -// the "Unicode replacement character" for the confusing bits and copy the rest. -GURL_API void CanonicalizeRef(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); -GURL_API void CanonicalizeRef(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); - -// Full canonicalizer --------------------------------------------------------- -// -// These functions replace any string contents, rather than append as above. -// See the above piece-by-piece functions for information specific to -// canonicalizing individual components. -// -// The output will be ASCII except the reference fragment, which may be UTF-8. -// -// The 8-bit versions require UTF-8 encoding. - -// Use for standard URLs with authorities and paths. -GURL_API bool CanonicalizeStandardURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool CanonicalizeStandardURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Use for file URLs. -GURL_API bool CanonicalizeFileURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool CanonicalizeFileURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Use for filesystem URLs. -GURL_API bool CanonicalizeFileSystemURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool CanonicalizeFileSystemURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Use for path URLs such as javascript. This does not modify the path in any -// way, for example, by escaping it. -GURL_API bool CanonicalizePathURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool CanonicalizePathURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Use for mailto URLs. This "canonicalizes" the url into a path and query -// component. It does not attempt to merge "to" fields. It uses UTF-8 for -// the query encoding if there is a query. This is because a mailto URL is -// really intended for an external mail program, and the encoding of a page, -// etc. which would influence a query encoding normally are irrelevant. -GURL_API bool CanonicalizeMailtoURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool CanonicalizeMailtoURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Part replacer -------------------------------------------------------------- - -// Internal structure used for storing separate strings for each component. -// The basic canonicalization functions use this structure internally so that -// component replacement (different strings for different components) can be -// treated on the same code path as regular canonicalization (the same string -// for each component). -// -// A url_parse::Parsed structure usually goes along with this. Those -// components identify offsets within these strings, so that they can all be -// in the same string, or spread arbitrarily across different ones. -// -// This structures does not own any data. It is the caller's responsibility to -// ensure that the data the pointers point to stays in scope and is not -// modified. -template -struct URLComponentSource { - // Constructor normally used by callers wishing to replace components. This - // will make them all NULL, which is no replacement. The caller would then - // override the components they want to replace. - URLComponentSource() - : scheme(NULL), - username(NULL), - password(NULL), - host(NULL), - port(NULL), - path(NULL), - query(NULL), - ref(NULL) { - } - - // Constructor normally used internally to initialize all the components to - // point to the same spec. - explicit URLComponentSource(const CHAR* default_value) - : scheme(default_value), - username(default_value), - password(default_value), - host(default_value), - port(default_value), - path(default_value), - query(default_value), - ref(default_value) { - } - - const CHAR* scheme; - const CHAR* username; - const CHAR* password; - const CHAR* host; - const CHAR* port; - const CHAR* path; - const CHAR* query; - const CHAR* ref; -}; - -// This structure encapsulates information on modifying a URL. Each component -// may either be left unchanged, replaced, or deleted. -// -// By default, each component is unchanged. For those components that should be -// modified, call either Set* or Clear* to modify it. -// -// The string passed to Set* functions DOES NOT GET COPIED AND MUST BE KEPT -// IN SCOPE BY THE CALLER for as long as this object exists! -// -// Prefer the 8-bit replacement version if possible since it is more efficient. -template -class Replacements { - public: - Replacements() { - } - - // Scheme - void SetScheme(const CHAR* s, const url_parse::Component& comp) { - sources_.scheme = s; - components_.scheme = comp; - } - // Note: we don't have a ClearScheme since this doesn't make any sense. - bool IsSchemeOverridden() const { return sources_.scheme != NULL; } - - // Username - void SetUsername(const CHAR* s, const url_parse::Component& comp) { - sources_.username = s; - components_.username = comp; - } - void ClearUsername() { - sources_.username = Placeholder(); - components_.username = url_parse::Component(); - } - bool IsUsernameOverridden() const { return sources_.username != NULL; } - - // Password - void SetPassword(const CHAR* s, const url_parse::Component& comp) { - sources_.password = s; - components_.password = comp; - } - void ClearPassword() { - sources_.password = Placeholder(); - components_.password = url_parse::Component(); - } - bool IsPasswordOverridden() const { return sources_.password != NULL; } - - // Host - void SetHost(const CHAR* s, const url_parse::Component& comp) { - sources_.host = s; - components_.host = comp; - } - void ClearHost() { - sources_.host = Placeholder(); - components_.host = url_parse::Component(); - } - bool IsHostOverridden() const { return sources_.host != NULL; } - - // Port - void SetPort(const CHAR* s, const url_parse::Component& comp) { - sources_.port = s; - components_.port = comp; - } - void ClearPort() { - sources_.port = Placeholder(); - components_.port = url_parse::Component(); - } - bool IsPortOverridden() const { return sources_.port != NULL; } - - // Path - void SetPath(const CHAR* s, const url_parse::Component& comp) { - sources_.path = s; - components_.path = comp; - } - void ClearPath() { - sources_.path = Placeholder(); - components_.path = url_parse::Component(); - } - bool IsPathOverridden() const { return sources_.path != NULL; } - - // Query - void SetQuery(const CHAR* s, const url_parse::Component& comp) { - sources_.query = s; - components_.query = comp; - } - void ClearQuery() { - sources_.query = Placeholder(); - components_.query = url_parse::Component(); - } - bool IsQueryOverridden() const { return sources_.query != NULL; } - - // Ref - void SetRef(const CHAR* s, const url_parse::Component& comp) { - sources_.ref = s; - components_.ref = comp; - } - void ClearRef() { - sources_.ref = Placeholder(); - components_.ref = url_parse::Component(); - } - bool IsRefOverridden() const { return sources_.ref != NULL; } - - // Getters for the itnernal data. See the variables below for how the - // information is encoded. - const URLComponentSource& sources() const { return sources_; } - const url_parse::Parsed& components() const { return components_; } - - private: - // Returns a pointer to a static empty string that is used as a placeholder - // to indicate a component should be deleted (see below). - const CHAR* Placeholder() { - static const CHAR empty_string = 0; - return &empty_string; - } - - // We support three states: - // - // Action | Source Component - // -----------------------+-------------------------------------------------- - // Don't change component | NULL (unused) - // Replace component | (replacement string) (replacement component) - // Delete component | (non-NULL) (invalid component: (0,-1)) - // - // We use a pointer to the empty string for the source when the component - // should be deleted. - URLComponentSource sources_; - url_parse::Parsed components_; -}; - -// The base must be an 8-bit canonical URL. -GURL_API bool ReplaceStandardURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool ReplaceStandardURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Filesystem URLs can only have the path, query, or ref replaced. -// All other components will be ignored. -GURL_API bool ReplaceFileSystemURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool ReplaceFileSystemURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Replacing some parts of a file URL is not permitted. Everything except -// the host, path, query, and ref will be ignored. -GURL_API bool ReplaceFileURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool ReplaceFileURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Path URLs can only have the scheme and path replaced. All other components -// will be ignored. -GURL_API bool ReplacePathURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool ReplacePathURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Mailto URLs can only have the scheme, path, and query replaced. -// All other components will be ignored. -GURL_API bool ReplaceMailtoURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); -GURL_API bool ReplaceMailtoURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); - -// Relative URL --------------------------------------------------------------- - -// Given an input URL or URL fragment |fragment|, determines if it is a -// relative or absolute URL and places the result into |*is_relative|. If it is -// relative, the relevant portion of the URL will be placed into -// |*relative_component| (there may have been trimmed whitespace, for example). -// This value is passed to ResolveRelativeURL. If the input is not relative, -// this value is UNDEFINED (it may be changed by the function). -// -// Returns true on success (we successfully determined the URL is relative or -// not). Failure means that the combination of URLs doesn't make any sense. -// -// The base URL should always be canonical, therefore is ASCII. -GURL_API bool IsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const char* fragment, - int fragment_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component); -GURL_API bool IsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const char16* fragment, - int fragment_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component); - -// Given a canonical parsed source URL, a URL fragment known to be relative, -// and the identified relevant portion of the relative URL (computed by -// IsRelativeURL), this produces a new parsed canonical URL in |output| and -// |out_parsed|. -// -// It also requires a flag indicating whether the base URL is a file: URL -// which triggers additional logic. -// -// The base URL should be canonical and have a host (may be empty for file -// URLs) and a path. If it doesn't have these, we can't resolve relative -// URLs off of it and will return the base as the output with an error flag. -// Becausee it is canonical is should also be ASCII. -// -// The query charset converter follows the same rules as CanonicalizeQuery. -// -// Returns true on success. On failure, the output will be "something -// reasonable" that will be consistent and valid, just probably not what -// was intended by the web page author or caller. -GURL_API bool ResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const char* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed); -GURL_API bool ResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const char16* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed); - -} // namespace url_canon - -#endif // GOOGLEURL_SRC_URL_CANON_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_etc.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_etc.cc.svn-base deleted file mode 100644 index 318c906e0..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_etc.cc.svn-base +++ /dev/null @@ -1,392 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Canonicalizers for random bits that aren't big enough for their own files. - -#include - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" - -namespace url_canon { - -namespace { - -// Returns true if the given character should be removed from the middle of a -// URL. -inline bool IsRemovableURLWhitespace(int ch) { - return ch == '\r' || ch == '\n' || ch == '\t'; -} - -// Backend for RemoveURLWhitespace (see declaration in url_canon.h). -// It sucks that we have to do this, since this takes about 13% of the total URL -// canonicalization time. -template -const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len, - CanonOutputT* buffer, - int* output_len) { - // Fast verification that there's nothing that needs removal. This is the 99% - // case, so we want it to be fast and don't care about impacting the speed - // when we do find whitespace. - int found_whitespace = false; - for (int i = 0; i < input_len; i++) { - if (!IsRemovableURLWhitespace(input[i])) - continue; - found_whitespace = true; - break; - } - - if (!found_whitespace) { - // Didn't find any whitespace, we don't need to do anything. We can just - // return the input as the output. - *output_len = input_len; - return input; - } - - // Remove the whitespace into the new buffer and return it. - for (int i = 0; i < input_len; i++) { - if (!IsRemovableURLWhitespace(input[i])) - buffer->push_back(input[i]); - } - *output_len = buffer->length(); - return buffer->data(); -} - -// Contains the canonical version of each possible input letter in the scheme -// (basically, lower-cased). The corresponding entry will be 0 if the letter -// is not allowed in a scheme. -const char kSchemeCanonical[0x80] = { -// 00-1f: all are invalid - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -// ' ' ! " # $ % & ' ( ) * + , - . / - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '+', 0, '-', '.', 0, -// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 , 0 , 0 , 0 , 0 , 0 , -// @ A B C D E F G H I J K L M N O - 0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', -// P Q R S T U V W X Y Z [ \ ] ^ _ - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0 , 0, 0 , 0, -// ` a b c d e f g h i j k l m n o - 0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', -// p q r s t u v w x y z { | } ~ - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 }; - -// This could be a table lookup as well by setting the high bit for each -// valid character, but it's only called once per URL, and it makes the lookup -// table easier to read not having extra stuff in it. -inline bool IsSchemeFirstChar(unsigned char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - -template -bool DoScheme(const CHAR* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme) { - if (scheme.len <= 0) { - // Scheme is unspecified or empty, convert to empty by appending a colon. - *out_scheme = url_parse::Component(output->length(), 0); - output->push_back(':'); - return true; - } - - // The output scheme starts from the current position. - out_scheme->begin = output->length(); - - // Danger: it's important that this code does not strip any characters: it - // only emits the canonical version (be it valid or escaped) of each of - // the input characters. Stripping would put it out of sync with - // url_util::FindAndCompareScheme, which could cause some security checks on - // schemes to be incorrect. - bool success = true; - int end = scheme.end(); - for (int i = scheme.begin; i < end; i++) { - UCHAR ch = static_cast(spec[i]); - char replacement = 0; - if (ch < 0x80) { - if (i == scheme.begin) { - // Need to do a special check for the first letter of the scheme. - if (IsSchemeFirstChar(static_cast(ch))) - replacement = kSchemeCanonical[ch]; - } else { - replacement = kSchemeCanonical[ch]; - } - } - - if (replacement) { - output->push_back(replacement); - } else if (ch == '%') { - // Canonicalizing the scheme multiple times should lead to the same - // result. Since invalid characters will be escaped, we need to preserve - // the percent to avoid multiple escaping. The scheme will be invalid. - success = false; - output->push_back('%'); - } else { - // Invalid character, store it but mark this scheme as invalid. - success = false; - - // This will escape the output and also handle encoding issues. - // Ignore the return value since we already failed. - AppendUTF8EscapedChar(spec, &i, end, output); - } - } - - // The output scheme ends with the the current position, before appending - // the colon. - out_scheme->len = output->length() - out_scheme->begin; - output->push_back(':'); - return success; -} - -// The username and password components reference ranges in the corresponding -// *_spec strings. Typically, these specs will be the same (we're -// canonicalizing a single source string), but may be different when -// replacing components. -template -bool DoUserInfo(const CHAR* username_spec, - const url_parse::Component& username, - const CHAR* password_spec, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password) { - if (username.len <= 0 && password.len <= 0) { - // Common case: no user info. We strip empty username/passwords. - *out_username = url_parse::Component(); - *out_password = url_parse::Component(); - return true; - } - - // Write the username. - out_username->begin = output->length(); - if (username.len > 0) { - // This will escape characters not valid for the username. - AppendStringOfType(&username_spec[username.begin], username.len, - CHAR_USERINFO, output); - } - out_username->len = output->length() - out_username->begin; - - // When there is a password, we need the separator. Note that we strip - // empty but specified passwords. - if (password.len > 0) { - output->push_back(':'); - out_password->begin = output->length(); - AppendStringOfType(&password_spec[password.begin], password.len, - CHAR_USERINFO, output); - out_password->len = output->length() - out_password->begin; - } else { - *out_password = url_parse::Component(); - } - - output->push_back('@'); - return true; -} - -// Helper functions for converting port integers to strings. -inline void WritePortInt(char* output, int output_len, int port) { - _itoa_s(port, output, output_len, 10); -} - -// This function will prepend the colon if there will be a port. -template -bool DoPort(const CHAR* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port) { - int port_num = url_parse::ParsePort(spec, port); - if (port_num == url_parse::PORT_UNSPECIFIED || - port_num == default_port_for_scheme) { - *out_port = url_parse::Component(); - return true; // Leave port empty. - } - - if (port_num == url_parse::PORT_INVALID) { - // Invalid port: We'll copy the text from the input so the user can see - // what the error was, and mark the URL as invalid by returning false. - output->push_back(':'); - out_port->begin = output->length(); - AppendInvalidNarrowString(spec, port.begin, port.end(), output); - out_port->len = output->length() - out_port->begin; - return false; - } - - // Convert port number back to an integer. Max port value is 5 digits, and - // the Parsed::ExtractPort will have made sure the integer is in range. - const int buf_size = 6; - char buf[buf_size]; - WritePortInt(buf, buf_size, port_num); - - // Append the port number to the output, preceeded by a colon. - output->push_back(':'); - out_port->begin = output->length(); - for (int i = 0; i < buf_size && buf[i]; i++) - output->push_back(buf[i]); - - out_port->len = output->length() - out_port->begin; - return true; -} - -template -void DoCanonicalizeRef(const CHAR* spec, - const url_parse::Component& ref, - CanonOutput* output, - url_parse::Component* out_ref) { - if (ref.len < 0) { - // Common case of no ref. - *out_ref = url_parse::Component(); - return; - } - - // Append the ref separator. Note that we need to do this even when the ref - // is empty but present. - output->push_back('#'); - out_ref->begin = output->length(); - - // Now iterate through all the characters, converting to UTF-8 and validating. - int end = ref.end(); - for (int i = ref.begin; i < end; i++) { - if (spec[i] == 0) { - // IE just strips NULLs, so we do too. - continue; - } else if (static_cast(spec[i]) < 0x20) { - // Unline IE seems to, we escape control characters. This will probably - // make the reference fragment unusable on a web page, but people - // shouldn't be using control characters in their anchor names. - AppendEscapedChar(static_cast(spec[i]), output); - } else if (static_cast(spec[i]) < 0x80) { - // Normal ASCII characters are just appended. - output->push_back(static_cast(spec[i])); - } else { - // Non-ASCII characters are appended unescaped, but only when they are - // valid. Invalid Unicode characters are replaced with the "invalid - // character" as IE seems to (ReadUTFChar puts the unicode replacement - // character in the output on failure for us). - unsigned code_point; - ReadUTFChar(spec, &i, end, &code_point); - AppendUTF8Value(code_point, output); - } - } - - out_ref->len = output->length() - out_ref->begin; -} - -} // namespace - -const char* RemoveURLWhitespace(const char* input, int input_len, - CanonOutputT* buffer, - int* output_len) { - return DoRemoveURLWhitespace(input, input_len, buffer, output_len); -} - -const char16* RemoveURLWhitespace(const char16* input, int input_len, - CanonOutputT* buffer, - int* output_len) { - return DoRemoveURLWhitespace(input, input_len, buffer, output_len); -} - -char CanonicalSchemeChar(char16 ch) { - if (ch >= 0x80) - return 0; // Non-ASCII is not supported by schemes. - return kSchemeCanonical[ch]; -} - -bool CanonicalizeScheme(const char* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme) { - return DoScheme(spec, scheme, output, out_scheme); -} - -bool CanonicalizeScheme(const char16* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme) { - return DoScheme(spec, scheme, output, out_scheme); -} - -bool CanonicalizeUserInfo(const char* username_source, - const url_parse::Component& username, - const char* password_source, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password) { - return DoUserInfo( - username_source, username, password_source, password, - output, out_username, out_password); -} - -bool CanonicalizeUserInfo(const char16* username_source, - const url_parse::Component& username, - const char16* password_source, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password) { - return DoUserInfo( - username_source, username, password_source, password, - output, out_username, out_password); -} - -bool CanonicalizePort(const char* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port) { - return DoPort(spec, port, - default_port_for_scheme, - output, out_port); -} - -bool CanonicalizePort(const char16* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port) { - return DoPort(spec, port, default_port_for_scheme, - output, out_port); -} - -void CanonicalizeRef(const char* spec, - const url_parse::Component& ref, - CanonOutput* output, - url_parse::Component* out_ref) { - DoCanonicalizeRef(spec, ref, output, out_ref); -} - -void CanonicalizeRef(const char16* spec, - const url_parse::Component& ref, - CanonOutput* output, - url_parse::Component* out_ref) { - DoCanonicalizeRef(spec, ref, output, out_ref); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_filesystemurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_filesystemurl.cc.svn-base deleted file mode 100644 index 7f792080d..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_filesystemurl.cc.svn-base +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2012, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Functions for canonicalizing "filesystem:file:" URLs. - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_parse_internal.h" -#include "googleurl/src/url_util.h" -#include "googleurl/src/url_util_internal.h" - -namespace url_canon { - -namespace { - -// We use the URLComponentSource for the outer URL, as it can have replacements, -// whereas the inner_url can't, so it uses spec. -template -bool DoCanonicalizeFileSystemURL(const CHAR* spec, - const URLComponentSource& source, - const url_parse::Parsed& parsed, - CharsetConverter* charset_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - // filesystem only uses {scheme, path, query, ref} -- clear the rest. - new_parsed->username = url_parse::Component(); - new_parsed->password = url_parse::Component(); - new_parsed->host = url_parse::Component(); - new_parsed->port = url_parse::Component(); - - const url_parse::Parsed* inner_parsed = parsed.inner_parsed(); - url_parse::Parsed new_inner_parsed; - - // Scheme (known, so we don't bother running it through the more - // complicated scheme canonicalizer). - new_parsed->scheme.begin = output->length(); - output->Append("filesystem:", 11); - new_parsed->scheme.len = 10; - - if (!parsed.inner_parsed() || !parsed.inner_parsed()->scheme.is_valid()) - return false; - - bool success = true; - if (url_util::CompareSchemeComponent(spec, inner_parsed->scheme, - url_util::kFileScheme)) { - new_inner_parsed.scheme.begin = output->length(); - output->Append("file://", 7); - new_inner_parsed.scheme.len = 4; - success &= CanonicalizePath(spec, inner_parsed->path, output, - &new_inner_parsed.path); - } else if (url_util::IsStandard(spec, inner_parsed->scheme)) { - success = - url_canon::CanonicalizeStandardURL(spec, - parsed.inner_parsed()->Length(), - *parsed.inner_parsed(), - charset_converter, output, - &new_inner_parsed); - } else { - // TODO(ericu): The URL is wrong, but should we try to output more of what - // we were given? Echoing back filesystem:mailto etc. doesn't seem all that - // useful. - return false; - } - // The filesystem type must be more than just a leading slash for validity. - success &= parsed.inner_parsed()->path.len > 1; - - success &= CanonicalizePath(source.path, parsed.path, output, - &new_parsed->path); - - // Ignore failures for query/ref since the URL can probably still be loaded. - CanonicalizeQuery(source.query, parsed.query, charset_converter, - output, &new_parsed->query); - CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); - if (success) - new_parsed->set_inner_parsed(new_inner_parsed); - - return success; -} - -} // namespace - -bool CanonicalizeFileSystemURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* charset_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeFileSystemURL( - spec, URLComponentSource(spec), parsed, charset_converter, output, - new_parsed); -} - -bool CanonicalizeFileSystemURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* charset_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeFileSystemURL( - spec, URLComponentSource(spec), parsed, charset_converter, output, - new_parsed); -} - -bool ReplaceFileSystemURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* charset_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupOverrideComponents(base, replacements, &source, &parsed); - return DoCanonicalizeFileSystemURL( - base, source, parsed, charset_converter, output, new_parsed); -} - -bool ReplaceFileSystemURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* charset_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - RawCanonOutput<1024> utf8; - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); - return DoCanonicalizeFileSystemURL( - base, source, parsed, charset_converter, output, new_parsed); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_fileurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_fileurl.cc.svn-base deleted file mode 100644 index 97023ebdc..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_fileurl.cc.svn-base +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Functions for canonicalizing "file:" URLs. - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_parse_internal.h" - -namespace url_canon { - -namespace { - -#ifdef WIN32 - -// Given a pointer into the spec, this copies and canonicalizes the drive -// letter and colon to the output, if one is found. If there is not a drive -// spec, it won't do anything. The index of the next character in the input -// spec is returned (after the colon when a drive spec is found, the begin -// offset if one is not). -template -int FileDoDriveSpec(const CHAR* spec, int begin, int end, - CanonOutput* output) { - // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, - // (with backslashes instead of slashes as well). - int num_slashes = url_parse::CountConsecutiveSlashes(spec, begin, end); - int after_slashes = begin + num_slashes; - - if (!url_parse::DoesBeginWindowsDriveSpec(spec, after_slashes, end)) - return begin; // Haven't consumed any characters - - // A drive spec is the start of a path, so we need to add a slash for the - // authority terminator (typically the third slash). - output->push_back('/'); - - // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid - // and that it is followed by a colon/pipe. - - // Normalize Windows drive letters to uppercase - if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z') - output->push_back(spec[after_slashes] - 'a' + 'A'); - else - output->push_back(static_cast(spec[after_slashes])); - - // Normalize the character following it to a colon rather than pipe. - output->push_back(':'); - return after_slashes + 2; -} - -#endif // WIN32 - -template -bool DoFileCanonicalizePath(const CHAR* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path) { - // Copies and normalizes the "c:" at the beginning, if present. - out_path->begin = output->length(); - int after_drive; -#ifdef WIN32 - after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output); -#else - after_drive = path.begin; -#endif - - // Copies the rest of the path, starting from the slash following the - // drive colon (if any, Windows only), or the first slash of the path. - bool success = true; - if (after_drive < path.end()) { - // Use the regular path canonicalizer to canonicalize the rest of the - // path. Give it a fake output component to write into. DoCanonicalizeFile - // will compute the full path component. - url_parse::Component sub_path = - url_parse::MakeRange(after_drive, path.end()); - url_parse::Component fake_output_path; - success = CanonicalizePath(spec, sub_path, output, &fake_output_path); - } else { - // No input path, canonicalize to a slash. - output->push_back('/'); - } - - out_path->len = output->length() - out_path->begin; - return success; -} - -template -bool DoCanonicalizeFileURL(const URLComponentSource& source, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - // Things we don't set in file: URLs. - new_parsed->username = url_parse::Component(); - new_parsed->password = url_parse::Component(); - new_parsed->port = url_parse::Component(); - - // Scheme (known, so we don't bother running it through the more - // complicated scheme canonicalizer). - new_parsed->scheme.begin = output->length(); - output->Append("file://", 7); - new_parsed->scheme.len = 4; - - // Append the host. For many file URLs, this will be empty. For UNC, this - // will be present. - // TODO(brettw) This doesn't do any checking for host name validity. We - // should probably handle validity checking of UNC hosts differently than - // for regular IP hosts. - bool success = CanonicalizeHost(source.host, parsed.host, - output, &new_parsed->host); - success &= DoFileCanonicalizePath(source.path, parsed.path, - output, &new_parsed->path); - CanonicalizeQuery(source.query, parsed.query, query_converter, - output, &new_parsed->query); - - // Ignore failure for refs since the URL can probably still be loaded. - CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); - - return success; -} - -} // namespace - -bool CanonicalizeFileURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeFileURL( - URLComponentSource(spec), parsed, query_converter, - output, new_parsed); -} - -bool CanonicalizeFileURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeFileURL( - URLComponentSource(spec), parsed, query_converter, - output, new_parsed); -} - -bool FileCanonicalizePath(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path) { - return DoFileCanonicalizePath(spec, path, - output, out_path); -} - -bool FileCanonicalizePath(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path) { - return DoFileCanonicalizePath(spec, path, - output, out_path); -} - -bool ReplaceFileURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupOverrideComponents(base, replacements, &source, &parsed); - return DoCanonicalizeFileURL( - source, parsed, query_converter, output, new_parsed); -} - -bool ReplaceFileURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - RawCanonOutput<1024> utf8; - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); - return DoCanonicalizeFileURL( - source, parsed, query_converter, output, new_parsed); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_host.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_host.cc.svn-base deleted file mode 100644 index 6642004c2..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_host.cc.svn-base +++ /dev/null @@ -1,401 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "base/logging.h" -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" - -namespace url_canon { - -namespace { - -// For reference, here's what IE supports: -// Key: 0 (disallowed: failure if present in the input) -// + (allowed either escaped or unescaped, and unmodified) -// U (allowed escaped or unescaped but always unescaped if present in -// escaped form) -// E (allowed escaped or unescaped but always escaped if present in -// unescaped form) -// % (only allowed escaped in the input, will be unmodified). -// I left blank alpha numeric characters. -// -// 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f -// ----------------------------------------------- -// 0 0 E E E E E E E E E E E E E E E -// 1 E E E E E E E E E E E E E E E E -// 2 E + E E + E + + + + + + + U U 0 -// 3 % % E + E 0 <-- Those are : ; < = > ? -// 4 % -// 5 U 0 U U U <-- Those are [ \ ] ^ _ -// 6 E <-- That's ` -// 7 E E E U E <-- Those are { | } ~ (UNPRINTABLE) -// -// NOTE: I didn't actually test all the control characters. Some may be -// disallowed in the input, but they are all accepted escaped except for 0. -// I also didn't test if characters affecting HTML parsing are allowed -// unescaped, eg. (") or (#), which would indicate the beginning of the path. -// Surprisingly, space is accepted in the input and always escaped. - -// This table lists the canonical version of all characters we allow in the -// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar -// value to indicate that this character should be escaped. We are a little more -// restrictive than IE, but less restrictive than Firefox. -// -// Note that we disallow the % character. We will allow it when part of an -// escape sequence, of course, but this disallows "%25". Even though IE allows -// it, allowing it would put us in a funny state. If there was an invalid -// escape sequence like "%zz", we'll add "%25zz" to the output and fail. -// Allowing percents means we'll succeed a second time, so validity would change -// based on how many times you run the canonicalizer. We prefer to always report -// the same vailidity, so reject this. -const unsigned char kEsc = 0xff; -const unsigned char kHostCharLookup[0x80] = { -// 00-1f: all are invalid - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -// ' ' ! " # $ % & ' ( ) * + , - . / - kEsc,kEsc,kEsc,kEsc,kEsc, 0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.', 0, -// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0 ,kEsc,kEsc,kEsc, 0 , -// @ A B C D E F G H I J K L M N O - kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', -// P Q R S T U V W X Y Z [ \ ] ^ _ - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0 , ']', 0 , '_', -// ` a b c d e f g h i j k l m n o - kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', -// p q r s t u v w x y z { | } ~ - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 }; - -const int kTempHostBufferLen = 1024; -typedef RawCanonOutputT StackBuffer; -typedef RawCanonOutputT StackBufferW; - -// Scans a host name and fills in the output flags according to what we find. -// |has_non_ascii| will be true if there are any non-7-bit characters, and -// |has_escaped| will be true if there is a percent sign. -template -void ScanHostname(const CHAR* spec, const url_parse::Component& host, - bool* has_non_ascii, bool* has_escaped) { - int end = host.end(); - *has_non_ascii = false; - *has_escaped = false; - for (int i = host.begin; i < end; i++) { - if (static_cast(spec[i]) >= 0x80) - *has_non_ascii = true; - else if (spec[i] == '%') - *has_escaped = true; - } -} - -// Canonicalizes a host name that is entirely 8-bit characters (even though -// the type holding them may be 16 bits. Escaped characters will be unescaped. -// Non-7-bit characters (for example, UTF-8) will be passed unchanged. -// -// The |*has_non_ascii| flag will be true if there are non-7-bit characters in -// the output. -// -// This function is used in two situations: -// -// * When the caller knows there is no non-ASCII or percent escaped -// characters. This is what DoHost does. The result will be a completely -// canonicalized host since we know nothing weird can happen (escaped -// characters could be unescaped to non-7-bit, so they have to be treated -// with suspicion at this point). It does not use the |has_non_ascii| flag. -// -// * When the caller has an 8-bit string that may need unescaping. -// DoComplexHost calls us this situation to do unescaping and validation. -// After this, it may do other IDN operations depending on the value of the -// |*has_non_ascii| flag. -// -// The return value indicates if the output is a potentially valid host name. -template -bool DoSimpleHost(const INCHAR* host, - int host_len, - CanonOutputT* output, - bool* has_non_ascii) { - *has_non_ascii = false; - - bool success = true; - for (int i = 0; i < host_len; ++i) { - unsigned int source = host[i]; - if (source == '%') { - // Unescape first, if possible. - // Source will be used only if decode operation was successful. - if (!DecodeEscaped(host, &i, host_len, - reinterpret_cast(&source))) { - // Invalid escaped character. There is nothing that can make this - // host valid. We append an escaped percent so the URL looks reasonable - // and mark as failed. - AppendEscapedChar('%', output); - success = false; - continue; - } - } - - if (source < 0x80) { - // We have ASCII input, we can use our lookup table. - unsigned char replacement = kHostCharLookup[source]; - if (!replacement) { - // Invalid character, add it as percent-escaped and mark as failed. - AppendEscapedChar(source, output); - success = false; - } else if (replacement == kEsc) { - // This character is valid but should be escaped. - AppendEscapedChar(source, output); - } else { - // Common case, the given character is valid in a hostname, the lookup - // table tells us the canonical representation of that character (lower - // cased). - output->push_back(replacement); - } - } else { - // It's a non-ascii char. Just push it to the output. - // In case where we have char16 input, and char output it's safe to - // cast char16->char only if input string was converted to ASCII. - output->push_back(static_cast(source)); - *has_non_ascii = true; - } - } - - return success; -} - -// Canonicalizes a host that requires IDN conversion. Returns true on success -bool DoIDNHost(const char16* src, int src_len, CanonOutput* output) { - // We need to escape URL before doing IDN conversion, since punicode strings - // cannot be escaped after they are created. - RawCanonOutputW url_escaped_host; - bool has_non_ascii; - DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii); - - StackBufferW wide_output; - if (!IDNToASCII(url_escaped_host.data(), - url_escaped_host.length(), - &wide_output)) { - // Some error, give up. This will write some reasonable looking - // representation of the string to the output. - AppendInvalidNarrowString(src, 0, src_len, output); - return false; - } - - // Now we check the ASCII output like a normal host. It will also handle - // unescaping. Although we unescaped everything before this function call, if - // somebody does %00 as fullwidth, ICU will convert this to ASCII. - bool success = DoSimpleHost(wide_output.data(), - wide_output.length(), - output, &has_non_ascii); - DCHECK(!has_non_ascii); - return success; -} - -// 8-bit convert host to its ASCII version: this converts the UTF-8 input to -// UTF-16. The has_escaped flag should be set if the input string requires -// unescaping. -bool DoComplexHost(const char* host, int host_len, - bool has_non_ascii, bool has_escaped, CanonOutput* output) { - // Save the current position in the output. We may write stuff and rewind it - // below, so we need to know where to rewind to. - int begin_length = output->length(); - - // Points to the UTF-8 data we want to convert. This will either be the - // input or the unescaped version written to |*output| if necessary. - const char* utf8_source; - int utf8_source_len; - if (has_escaped) { - // Unescape before converting to UTF-16 for IDN. We write this into the - // output because it most likely does not require IDNization, and we can - // save another huge stack buffer. It will be replaced below if it requires - // IDN. This will also update our non-ASCII flag so we know whether the - // unescaped input requires IDN. - if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) { - // Error with some escape sequence. We'll call the current output - // complete. DoSimpleHost will have written some "reasonable" output. - return false; - } - - // Unescaping may have left us with ASCII input, in which case the - // unescaped version we wrote to output is complete. - if (!has_non_ascii) { - return true; - } - - // Save the pointer into the data was just converted (it may be appended to - // other data in the output buffer). - utf8_source = &output->data()[begin_length]; - utf8_source_len = output->length() - begin_length; - } else { - // We don't need to unescape, use input for IDNization later. (We know the - // input has non-ASCII, or the simple version would have been called - // instead of us.) - utf8_source = host; - utf8_source_len = host_len; - } - - // Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion. - // Above, we may have used the output to write the unescaped values to, so - // we have to rewind it to where we started after we convert it to UTF-16. - StackBufferW utf16; - if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) { - // In this error case, the input may or may not be the output. - StackBuffer utf8; - for (int i = 0; i < utf8_source_len; i++) - utf8.push_back(utf8_source[i]); - output->set_length(begin_length); - AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output); - return false; - } - output->set_length(begin_length); - - // This will call DoSimpleHost which will do normal ASCII canonicalization - // and also check for IP addresses in the outpt. - return DoIDNHost(utf16.data(), utf16.length(), output); -} - -// UTF-16 convert host to its ASCII version. The set up is already ready for -// the backend, so we just pass through. The has_escaped flag should be set if -// the input string requires unescaping. -bool DoComplexHost(const char16* host, int host_len, - bool has_non_ascii, bool has_escaped, CanonOutput* output) { - if (has_escaped) { - // Yikes, we have escaped characters with wide input. The escaped - // characters should be interpreted as UTF-8. To solve this problem, - // we convert to UTF-8, unescape, then convert back to UTF-16 for IDN. - // - // We don't bother to optimize the conversion in the ASCII case (which - // *could* just be a copy) and use the UTF-8 path, because it should be - // very rare that host names have escaped characters, and it is relatively - // fast to do the conversion anyway. - StackBuffer utf8; - if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) { - AppendInvalidNarrowString(host, 0, host_len, output); - return false; - } - - // Once we convert to UTF-8, we can use the 8-bit version of the complex - // host handling code above. - return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii, - has_escaped, output); - } - - // No unescaping necessary, we can safely pass the input to ICU. This - // function will only get called if we either have escaped or non-ascii - // input, so it's safe to just use ICU now. Even if the input is ASCII, - // this function will do the right thing (just slower than we could). - return DoIDNHost(host, host_len, output); -} - -template -void DoHost(const CHAR* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info) { - if (host.len <= 0) { - // Empty hosts don't need anything. - host_info->family = CanonHostInfo::NEUTRAL; - host_info->out_host = url_parse::Component(); - return; - } - - bool has_non_ascii, has_escaped; - ScanHostname(spec, host, &has_non_ascii, &has_escaped); - - // Keep track of output's initial length, so we can rewind later. - const int output_begin = output->length(); - - bool success; - if (!has_non_ascii && !has_escaped) { - success = DoSimpleHost(&spec[host.begin], host.len, - output, &has_non_ascii); - DCHECK(!has_non_ascii); - } else { - success = DoComplexHost(&spec[host.begin], host.len, - has_non_ascii, has_escaped, output); - } - - if (!success) { - // Canonicalization failed. Set BROKEN to notify the caller. - host_info->family = CanonHostInfo::BROKEN; - } else { - // After all the other canonicalization, check if we ended up with an IP - // address. IP addresses are small, so writing into this temporary buffer - // should not cause an allocation. - RawCanonOutput<64> canon_ip; - CanonicalizeIPAddress(output->data(), - url_parse::MakeRange(output_begin, output->length()), - &canon_ip, host_info); - - // If we got an IPv4/IPv6 address, copy the canonical form back to the - // real buffer. Otherwise, it's a hostname or broken IP, in which case - // we just leave it in place. - if (host_info->IsIPAddress()) { - output->set_length(output_begin); - output->Append(canon_ip.data(), canon_ip.length()); - } - } - - host_info->out_host = url_parse::MakeRange(output_begin, output->length()); -} - -} // namespace - -bool CanonicalizeHost(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - url_parse::Component* out_host) { - CanonHostInfo host_info; - DoHost(spec, host, output, &host_info); - *out_host = host_info.out_host; - return (host_info.family != CanonHostInfo::BROKEN); -} - -bool CanonicalizeHost(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - url_parse::Component* out_host) { - CanonHostInfo host_info; - DoHost(spec, host, output, &host_info); - *out_host = host_info.out_host; - return (host_info.family != CanonHostInfo::BROKEN); -} - -void CanonicalizeHostVerbose(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo *host_info) { - DoHost(spec, host, output, host_info); -} - -void CanonicalizeHostVerbose(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo *host_info) { - DoHost(spec, host, output, host_info); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.cc.svn-base deleted file mode 100644 index eaae64345..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.cc.svn-base +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2011, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// ICU integration functions. - -#include -#include -#include -#include -#include - -#include "googleurl/src/url_canon_icu.h" -#include "googleurl/src/url_canon_internal.h" // for _itoa_s - -#include "base/logging.h" - -namespace url_canon { - -namespace { - -// Called when converting a character that can not be represented, this will -// append an escaped version of the numerical character reference for that code -// point. It is of the form "Ӓ" and we will escape the non-digits to -// "%26%231234%3B". Why? This is what Netscape did back in the olden days. -void appendURLEscapedChar(const void* context, - UConverterFromUnicodeArgs* from_args, - const UChar* code_units, - int32_t length, - UChar32 code_point, - UConverterCallbackReason reason, - UErrorCode* err) { - if (reason == UCNV_UNASSIGNED) { - *err = U_ZERO_ERROR; - - const static int prefix_len = 6; - const static char prefix[prefix_len + 1] = "%26%23"; // "&#" percent-escaped - ucnv_cbFromUWriteBytes(from_args, prefix, prefix_len, 0, err); - - DCHECK(code_point < 0x110000); - char number[8]; // Max Unicode code point is 7 digits. - _itoa_s(code_point, number, 10); - int number_len = static_cast(strlen(number)); - ucnv_cbFromUWriteBytes(from_args, number, number_len, 0, err); - - const static int postfix_len = 3; - const static char postfix[postfix_len + 1] = "%3B"; // ";" percent-escaped - ucnv_cbFromUWriteBytes(from_args, postfix, postfix_len, 0, err); - } -} - -// A class for scoping the installation of the invalid character callback. -class AppendHandlerInstaller { - public: - // The owner of this object must ensure that the converter is alive for the - // duration of this object's lifetime. - AppendHandlerInstaller(UConverter* converter) : converter_(converter) { - UErrorCode err = U_ZERO_ERROR; - ucnv_setFromUCallBack(converter_, appendURLEscapedChar, 0, - &old_callback_, &old_context_, &err); - } - - ~AppendHandlerInstaller() { - UErrorCode err = U_ZERO_ERROR; - ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err); - } - - private: - UConverter* converter_; - - UConverterFromUCallback old_callback_; - const void* old_context_; -}; - -} // namespace - -ICUCharsetConverter::ICUCharsetConverter(UConverter* converter) - : converter_(converter) { -} - -ICUCharsetConverter::~ICUCharsetConverter() { -} - -void ICUCharsetConverter::ConvertFromUTF16(const char16* input, - int input_len, - CanonOutput* output) { - // Install our error handler. It will be called for character that can not - // be represented in the destination character set. - AppendHandlerInstaller handler(converter_); - - int begin_offset = output->length(); - int dest_capacity = output->capacity() - begin_offset; - output->set_length(output->length()); - - do { - UErrorCode err = U_ZERO_ERROR; - char* dest = &output->data()[begin_offset]; - int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity, - input, input_len, &err); - if (err != U_BUFFER_OVERFLOW_ERROR) { - output->set_length(begin_offset + required_capacity); - return; - } - - // Output didn't fit, expand - dest_capacity = required_capacity; - output->Resize(begin_offset + dest_capacity); - } while (true); -} - -// Converts the Unicode input representing a hostname to ASCII using IDN rules. -// The output must be ASCII, but is represented as wide characters. -// -// On success, the output will be filled with the ASCII host name and it will -// return true. Unlike most other canonicalization functions, this assumes that -// the output is empty. The beginning of the host will be at offset 0, and -// the length of the output will be set to the length of the new host name. -// -// On error, this will return false. The output in this case is undefined. -bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output) { - DCHECK(output->length() == 0); // Output buffer is assumed empty. - while (true) { - // Use ALLOW_UNASSIGNED to be more tolerant of hostnames that violate - // the spec (which do exist). This does not present any risk and is a - // little more future proof. - UErrorCode err = U_ZERO_ERROR; - int num_converted = uidna_IDNToASCII(src, src_len, output->data(), - output->capacity(), - UIDNA_ALLOW_UNASSIGNED, NULL, &err); - if (err == U_ZERO_ERROR) { - output->set_length(num_converted); - return true; - } - if (err != U_BUFFER_OVERFLOW_ERROR) - return false; // Unknown error, give up. - - // Not enough room in our buffer, expand. - output->Resize(output->capacity() * 2); - } -} - -bool ReadUTFChar(const char* str, int* begin, int length, - unsigned* code_point_out) { - int code_point; // Avoids warning when U8_NEXT writes -1 to it. - U8_NEXT(str, *begin, length, code_point); - *code_point_out = static_cast(code_point); - - // The ICU macro above moves to the next char, we want to point to the last - // char consumed. - (*begin)--; - - // Validate the decoded value. - if (U_IS_UNICODE_CHAR(code_point)) - return true; - *code_point_out = kUnicodeReplacementCharacter; - return false; -} - -bool ReadUTFChar(const char16* str, int* begin, int length, - unsigned* code_point) { - if (U16_IS_SURROGATE(str[*begin])) { - if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || - !U16_IS_TRAIL(str[*begin + 1])) { - // Invalid surrogate pair. - *code_point = kUnicodeReplacementCharacter; - return false; - } else { - // Valid surrogate pair. - *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]); - (*begin)++; - } - } else { - // Not a surrogate, just one 16-bit word. - *code_point = str[*begin]; - } - - if (U_IS_UNICODE_CHAR(*code_point)) - return true; - - // Invalid code point. - *code_point = kUnicodeReplacementCharacter; - return false; -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.h.svn-base deleted file mode 100644 index e529fcbd9..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_icu.h.svn-base +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2011, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// ICU integration functions. - -#ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__ -#define GOOGLEURL_SRC_URL_CANON_ICU_H__ - -#include "googleurl/src/url_canon.h" - -typedef struct UConverter UConverter; - -namespace url_canon { - -// An implementation of CharsetConverter that implementations can use to -// interface the canonicalizer with ICU's conversion routines. -class ICUCharsetConverter : public CharsetConverter { - public: - // Constructs a converter using an already-existing ICU character set - // converter. This converter is NOT owned by this object; the lifetime must - // be managed by the creator such that it is alive as long as this is. - GURL_API ICUCharsetConverter(UConverter* converter); - - GURL_API virtual ~ICUCharsetConverter(); - - GURL_API virtual void ConvertFromUTF16(const char16* input, - int input_len, - CanonOutput* output); - - private: - // The ICU converter, not owned by this class. - UConverter* converter_; -}; - -} // namespace url_canon - -#endif // GOOGLEURL_SRC_URL_CANON_ICU_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.cc.svn-base deleted file mode 100644 index cd791bb35..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.cc.svn-base +++ /dev/null @@ -1,427 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include -#include - -#include "googleurl/src/url_canon_internal.h" - -namespace url_canon { - -namespace { - -template -void DoAppendStringOfType(const CHAR* source, int length, - SharedCharTypes type, - CanonOutput* output) { - for (int i = 0; i < length; i++) { - if (static_cast(source[i]) >= 0x80) { - // ReadChar will fill the code point with kUnicodeReplacementCharacter - // when the input is invalid, which is what we want. - unsigned code_point; - ReadUTFChar(source, &i, length, &code_point); - AppendUTF8EscapedValue(code_point, output); - } else { - // Just append the 7-bit character, possibly escaping it. - unsigned char uch = static_cast(source[i]); - if (!IsCharOfType(uch, type)) - AppendEscapedChar(uch, output); - else - output->push_back(uch); - } - } -} - -// This function assumes the input values are all contained in 8-bit, -// although it allows any type. Returns true if input is valid, false if not. -template -void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end, - CanonOutput* output) { - for (int i = begin; i < end; i++) { - UCHAR uch = static_cast(spec[i]); - if (uch >= 0x80) { - // Handle UTF-8/16 encodings. This call will correctly handle the error - // case by appending the invalid character. - AppendUTF8EscapedChar(spec, &i, end, output); - } else if (uch <= ' ' || uch == 0x7f) { - // This function is for error handling, so we escape all control - // characters and spaces, but not anything else since we lack - // context to do something more specific. - AppendEscapedChar(static_cast(uch), output); - } else { - output->push_back(static_cast(uch)); - } - } -} - -// Overrides one component, see the url_canon::Replacements structure for -// what the various combionations of source pointer and component mean. -void DoOverrideComponent(const char* override_source, - const url_parse::Component& override_component, - const char** dest, - url_parse::Component* dest_component) { - if (override_source) { - *dest = override_source; - *dest_component = override_component; - } -} - -// Similar to DoOverrideComponent except that it takes a UTF-16 input and does -// not actually set the output character pointer. -// -// The input is converted to UTF-8 at the end of the given buffer as a temporary -// holding place. The component indentifying the portion of the buffer used in -// the |utf8_buffer| will be specified in |*dest_component|. -// -// This will not actually set any |dest| pointer like DoOverrideComponent -// does because all of the pointers will point into the |utf8_buffer|, which -// may get resized while we're overriding a subsequent component. Instead, the -// caller should use the beginning of the |utf8_buffer| as the string pointer -// for all components once all overrides have been prepared. -bool PrepareUTF16OverrideComponent( - const char16* override_source, - const url_parse::Component& override_component, - CanonOutput* utf8_buffer, - url_parse::Component* dest_component) { - bool success = true; - if (override_source) { - if (!override_component.is_valid()) { - // Non-"valid" component (means delete), so we need to preserve that. - *dest_component = url_parse::Component(); - } else { - // Convert to UTF-8. - dest_component->begin = utf8_buffer->length(); - success = ConvertUTF16ToUTF8(&override_source[override_component.begin], - override_component.len, utf8_buffer); - dest_component->len = utf8_buffer->length() - dest_component->begin; - } - } - return success; -} - -} // namespace - -// See the header file for this array's declaration. -const unsigned char kSharedCharTypeTable[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f - 0, // 0x20 ' ' (escape spaces in queries) - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x21 ! - 0, // 0x22 " - 0, // 0x23 # (invalid in query since it marks the ref) - CHAR_QUERY | CHAR_USERINFO, // 0x24 $ - CHAR_QUERY | CHAR_USERINFO, // 0x25 % - CHAR_QUERY | CHAR_USERINFO, // 0x26 & - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x27 ' - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x28 ( - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x29 ) - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x2a * - CHAR_QUERY | CHAR_USERINFO, // 0x2b + - CHAR_QUERY | CHAR_USERINFO, // 0x2c , - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x2d - - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x2e . - CHAR_QUERY, // 0x2f / - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x30 0 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x31 1 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x32 2 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x33 3 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x34 4 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x35 5 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x36 6 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT, // 0x37 7 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT, // 0x38 8 - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT, // 0x39 9 - CHAR_QUERY, // 0x3a : - CHAR_QUERY, // 0x3b ; - 0, // 0x3c < (Try to prevent certain types of XSS.) - CHAR_QUERY, // 0x3d = - 0, // 0x3e > (Try to prevent certain types of XSS.) - CHAR_QUERY, // 0x3f ? - CHAR_QUERY, // 0x40 @ - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x41 A - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x42 B - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x43 C - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x44 D - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x45 E - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x46 F - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x47 G - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x48 H - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x49 I - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x4a J - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x4b K - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x4c L - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x4d M - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x4e N - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x4f O - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x50 P - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x51 Q - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x52 R - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x53 S - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x54 T - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x55 U - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x56 V - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x57 W - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58 X - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x59 Y - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x5a Z - CHAR_QUERY, // 0x5b [ - CHAR_QUERY, // 0x5c '\' - CHAR_QUERY, // 0x5d ] - CHAR_QUERY, // 0x5e ^ - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x5f _ - CHAR_QUERY, // 0x60 ` - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x61 a - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x62 b - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x63 c - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x64 d - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x65 e - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT, // 0x66 f - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x67 g - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x68 h - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x69 i - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x6a j - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x6b k - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x6c l - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x6d m - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x6e n - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x6f o - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x70 p - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x71 q - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x72 r - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x73 s - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x74 t - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x75 u - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x76 v - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x77 w - CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x78 x - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x79 y - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x7a z - CHAR_QUERY, // 0x7b { - CHAR_QUERY, // 0x7c | - CHAR_QUERY, // 0x7d } - CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT, // 0x7e ~ - 0, // 0x7f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff -}; - -const char kHexCharLookup[0x10] = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', -}; - -const char kCharToHexLookup[8] = { - 0, // 0x00 - 0x1f - '0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39 - 'A' - 10, // 0x40 - 0x5f: letters A - F are 0x41 - 0x46 - 'a' - 10, // 0x60 - 0x7f: letters a - f are 0x61 - 0x66 - 0, // 0x80 - 0x9F - 0, // 0xA0 - 0xBF - 0, // 0xC0 - 0xDF - 0, // 0xE0 - 0xFF -}; - -const char16 kUnicodeReplacementCharacter = 0xfffd; - -void AppendStringOfType(const char* source, int length, - SharedCharTypes type, - CanonOutput* output) { - DoAppendStringOfType(source, length, type, output); -} - -void AppendStringOfType(const char16* source, int length, - SharedCharTypes type, - CanonOutput* output) { - DoAppendStringOfType(source, length, type, output); -} - -void AppendInvalidNarrowString(const char* spec, int begin, int end, - CanonOutput* output) { - DoAppendInvalidNarrowString(spec, begin, end, output); -} - -void AppendInvalidNarrowString(const char16* spec, int begin, int end, - CanonOutput* output) { - DoAppendInvalidNarrowString(spec, begin, end, output); -} - -bool ConvertUTF16ToUTF8(const char16* input, int input_len, - CanonOutput* output) { - bool success = true; - for (int i = 0; i < input_len; i++) { - unsigned code_point; - success &= ReadUTFChar(input, &i, input_len, &code_point); - AppendUTF8Value(code_point, output); - } - return success; -} - -bool ConvertUTF8ToUTF16(const char* input, int input_len, - CanonOutputT* output) { - bool success = true; - for (int i = 0; i < input_len; i++) { - unsigned code_point; - success &= ReadUTFChar(input, &i, input_len, &code_point); - AppendUTF16Value(code_point, output); - } - return success; -} - -void SetupOverrideComponents(const char* base, - const Replacements& repl, - URLComponentSource* source, - url_parse::Parsed* parsed) { - // Get the source and parsed structures of the things we are replacing. - const URLComponentSource& repl_source = repl.sources(); - const url_parse::Parsed& repl_parsed = repl.components(); - - DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, - &source->scheme, &parsed->scheme); - DoOverrideComponent(repl_source.username, repl_parsed.username, - &source->username, &parsed->username); - DoOverrideComponent(repl_source.password, repl_parsed.password, - &source->password, &parsed->password); - - // Our host should be empty if not present, so override the default setup. - DoOverrideComponent(repl_source.host, repl_parsed.host, - &source->host, &parsed->host); - if (parsed->host.len == -1) - parsed->host.len = 0; - - DoOverrideComponent(repl_source.port, repl_parsed.port, - &source->port, &parsed->port); - DoOverrideComponent(repl_source.path, repl_parsed.path, - &source->path, &parsed->path); - DoOverrideComponent(repl_source.query, repl_parsed.query, - &source->query, &parsed->query); - DoOverrideComponent(repl_source.ref, repl_parsed.ref, - &source->ref, &parsed->ref); -} - -bool SetupUTF16OverrideComponents(const char* base, - const Replacements& repl, - CanonOutput* utf8_buffer, - URLComponentSource* source, - url_parse::Parsed* parsed) { - bool success = true; - - // Get the source and parsed structures of the things we are replacing. - const URLComponentSource& repl_source = repl.sources(); - const url_parse::Parsed& repl_parsed = repl.components(); - - success &= PrepareUTF16OverrideComponent( - repl_source.scheme, repl_parsed.scheme, - utf8_buffer, &parsed->scheme); - success &= PrepareUTF16OverrideComponent( - repl_source.username, repl_parsed.username, - utf8_buffer, &parsed->username); - success &= PrepareUTF16OverrideComponent( - repl_source.password, repl_parsed.password, - utf8_buffer, &parsed->password); - success &= PrepareUTF16OverrideComponent( - repl_source.host, repl_parsed.host, - utf8_buffer, &parsed->host); - success &= PrepareUTF16OverrideComponent( - repl_source.port, repl_parsed.port, - utf8_buffer, &parsed->port); - success &= PrepareUTF16OverrideComponent( - repl_source.path, repl_parsed.path, - utf8_buffer, &parsed->path); - success &= PrepareUTF16OverrideComponent( - repl_source.query, repl_parsed.query, - utf8_buffer, &parsed->query); - success &= PrepareUTF16OverrideComponent( - repl_source.ref, repl_parsed.ref, - utf8_buffer, &parsed->ref); - - // PrepareUTF16OverrideComponent will not have set the data pointer since the - // buffer could be resized, invalidating the pointers. We set the data - // pointers for affected components now that the buffer is finalized. - if (repl_source.scheme) source->scheme = utf8_buffer->data(); - if (repl_source.username) source->username = utf8_buffer->data(); - if (repl_source.password) source->password = utf8_buffer->data(); - if (repl_source.host) source->host = utf8_buffer->data(); - if (repl_source.port) source->port = utf8_buffer->data(); - if (repl_source.path) source->path = utf8_buffer->data(); - if (repl_source.query) source->query = utf8_buffer->data(); - if (repl_source.ref) source->ref = utf8_buffer->data(); - - return success; -} - -#ifndef WIN32 - -int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) { - const char* format_str; - if (radix == 10) - format_str = "%d"; - else if (radix == 16) - format_str = "%x"; - else - return EINVAL; - - int written = snprintf(buffer, size_in_chars, format_str, value); - if (static_cast(written) >= size_in_chars) { - // Output was truncated, or written was negative. - return EINVAL; - } - return 0; -} - -int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix) { - if (radix != 10) - return EINVAL; - - // No more than 12 characters will be required for a 32-bit integer. - // Add an extra byte for the terminating null. - char temp[13]; - int written = snprintf(temp, sizeof(temp), "%d", value); - if (static_cast(written) >= size_in_chars) { - // Output was truncated, or written was negative. - return EINVAL; - } - - for (int i = 0; i < written; ++i) { - buffer[i] = static_cast(temp[i]); - } - buffer[written] = '\0'; - return 0; -} - -#endif // !WIN32 - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.h.svn-base deleted file mode 100644 index 9165398fa..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal.h.svn-base +++ /dev/null @@ -1,461 +0,0 @@ -// Copyright 2011, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// This file is intended to be included in another C++ file where the character -// types are defined. This allows us to write mostly generic code, but not have -// templace bloat because everything is inlined when anybody calls any of our -// functions. - -#ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ -#define GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ - -#include - -#include "base/logging.h" -#include "googleurl/src/url_canon.h" - -namespace url_canon { - -// Character type handling ----------------------------------------------------- - -// Bits that identify different character types. These types identify different -// bits that are set for each 8-bit character in the kSharedCharTypeTable. -enum SharedCharTypes { - // Characters that do not require escaping in queries. Characters that do - // not have this flag will be escaped; see url_canon_query.cc - CHAR_QUERY = 1, - - // Valid in the username/password field. - CHAR_USERINFO = 2, - - // Valid in a IPv4 address (digits plus dot and 'x' for hex). - CHAR_IPV4 = 4, - - // Valid in an ASCII-representation of a hex digit (as in %-escaped). - CHAR_HEX = 8, - - // Valid in an ASCII-representation of a decimal digit. - CHAR_DEC = 16, - - // Valid in an ASCII-representation of an octal digit. - CHAR_OCT = 32, - - // Characters that do not require escaping in encodeURIComponent. Characters - // that do not have this flag will be escaped; see url_util.cc. - CHAR_COMPONENT = 64, -}; - -// This table contains the flags in SharedCharTypes for each 8-bit character. -// Some canonicalization functions have their own specialized lookup table. -// For those with simple requirements, we have collected the flags in one -// place so there are fewer lookup tables to load into the CPU cache. -// -// Using an unsigned char type has a small but measurable performance benefit -// over using a 32-bit number. -extern const unsigned char kSharedCharTypeTable[0x100]; - -// More readable wrappers around the character type lookup table. -inline bool IsCharOfType(unsigned char c, SharedCharTypes type) { - return !!(kSharedCharTypeTable[c] & type); -} -inline bool IsQueryChar(unsigned char c) { - return IsCharOfType(c, CHAR_QUERY); -} -inline bool IsIPv4Char(unsigned char c) { - return IsCharOfType(c, CHAR_IPV4); -} -inline bool IsHexChar(unsigned char c) { - return IsCharOfType(c, CHAR_HEX); -} -inline bool IsComponentChar(unsigned char c) { - return IsCharOfType(c, CHAR_COMPONENT); -} - -// Appends the given string to the output, escaping characters that do not -// match the given |type| in SharedCharTypes. -void AppendStringOfType(const char* source, int length, - SharedCharTypes type, - CanonOutput* output); -void AppendStringOfType(const char16* source, int length, - SharedCharTypes type, - CanonOutput* output); - -// Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit -// that will be used to represent it. -GURL_API extern const char kHexCharLookup[0x10]; - -// This lookup table allows fast conversion between ASCII hex letters and their -// corresponding numerical value. The 8-bit range is divided up into 8 -// regions of 0x20 characters each. Each of the three character types (numbers, -// uppercase, lowercase) falls into different regions of this range. The table -// contains the amount to subtract from characters in that range to get at -// the corresponding numerical value. -// -// See HexDigitToValue for the lookup. -extern const char kCharToHexLookup[8]; - -// Assumes the input is a valid hex digit! Call IsHexChar before using this. -inline unsigned char HexCharToValue(unsigned char c) { - return c - kCharToHexLookup[c / 0x20]; -} - -// Indicates if the given character is a dot or dot equivalent, returning the -// number of characters taken by it. This will be one for a literal dot, 3 for -// an escaped dot. If the character is not a dot, this will return 0. -template -inline int IsDot(const CHAR* spec, int offset, int end) { - if (spec[offset] == '.') { - return 1; - } else if (spec[offset] == '%' && offset + 3 <= end && - spec[offset + 1] == '2' && - (spec[offset + 2] == 'e' || spec[offset + 2] == 'E')) { - // Found "%2e" - return 3; - } - return 0; -} - -// Returns the canonicalized version of the input character according to scheme -// rules. This is implemented alongside the scheme canonicalizer, and is -// required for relative URL resolving to test for scheme equality. -// -// Returns 0 if the input character is not a valid scheme character. -char CanonicalSchemeChar(char16 ch); - -// Write a single character, escaped, to the output. This always escapes: it -// does no checking that thee character requires escaping. -// Escaping makes sense only 8 bit chars, so code works in all cases of -// input parameters (8/16bit). -template -inline void AppendEscapedChar(UINCHAR ch, - CanonOutputT* output) { - output->push_back('%'); - output->push_back(kHexCharLookup[(ch >> 4) & 0xf]); - output->push_back(kHexCharLookup[ch & 0xf]); -} - -// The character we'll substitute for undecodable or invalid characters. -extern const char16 kUnicodeReplacementCharacter; - -// UTF-8 functions ------------------------------------------------------------ - -// Reads one character in UTF-8 starting at |*begin| in |str| and places -// the decoded value into |*code_point|. If the character is valid, we will -// return true. If invalid, we'll return false and put the -// kUnicodeReplacementCharacter into |*code_point|. -// -// |*begin| will be updated to point to the last character consumed so it -// can be incremented in a loop and will be ready for the next character. -// (for a single-byte ASCII character, it will not be changed). -// -// Implementation is in url_canon_icu.cc. -GURL_API bool ReadUTFChar(const char* str, int* begin, int length, - unsigned* code_point_out); - -// Generic To-UTF-8 converter. This will call the given append method for each -// character that should be appended, with the given output method. Wrappers -// are provided below for escaped and non-escaped versions of this. -// -// The char_value must have already been checked that it's a valid Unicode -// character. -template -inline void DoAppendUTF8(unsigned char_value, Output* output) { - if (char_value <= 0x7f) { - Appender(static_cast(char_value), output); - } else if (char_value <= 0x7ff) { - // 110xxxxx 10xxxxxx - Appender(static_cast(0xC0 | (char_value >> 6)), - output); - Appender(static_cast(0x80 | (char_value & 0x3f)), - output); - } else if (char_value <= 0xffff) { - // 1110xxxx 10xxxxxx 10xxxxxx - Appender(static_cast(0xe0 | (char_value >> 12)), - output); - Appender(static_cast(0x80 | ((char_value >> 6) & 0x3f)), - output); - Appender(static_cast(0x80 | (char_value & 0x3f)), - output); - } else if (char_value <= 0x10FFFF) { // Max unicode code point. - // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - Appender(static_cast(0xf0 | (char_value >> 18)), - output); - Appender(static_cast(0x80 | ((char_value >> 12) & 0x3f)), - output); - Appender(static_cast(0x80 | ((char_value >> 6) & 0x3f)), - output); - Appender(static_cast(0x80 | (char_value & 0x3f)), - output); - } else { - // Invalid UTF-8 character (>20 bits). - NOTREACHED(); - } -} - -// Helper used by AppendUTF8Value below. We use an unsigned parameter so there -// are no funny sign problems with the input, but then have to convert it to -// a regular char for appending. -inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) { - output->push_back(static_cast(ch)); -} - -// Writes the given character to the output as UTF-8. This does NO checking -// of the validity of the unicode characters; the caller should ensure that -// the value it is appending is valid to append. -inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) { - DoAppendUTF8(char_value, output); -} - -// Writes the given character to the output as UTF-8, escaping ALL -// characters (even when they are ASCII). This does NO checking of the -// validity of the unicode characters; the caller should ensure that the value -// it is appending is valid to append. -inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) { - DoAppendUTF8(char_value, output); -} - -// UTF-16 functions ----------------------------------------------------------- - -// Reads one character in UTF-16 starting at |*begin| in |str| and places -// the decoded value into |*code_point|. If the character is valid, we will -// return true. If invalid, we'll return false and put the -// kUnicodeReplacementCharacter into |*code_point|. -// -// |*begin| will be updated to point to the last character consumed so it -// can be incremented in a loop and will be ready for the next character. -// (for a single-16-bit-word character, it will not be changed). -// -// Implementation is in url_canon_icu.cc. -GURL_API bool ReadUTFChar(const char16* str, int* begin, int length, - unsigned* code_point); - -// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. -inline void AppendUTF16Value(unsigned code_point, - CanonOutputT* output) { - if (code_point > 0xffff) { - output->push_back(static_cast((code_point >> 10) + 0xd7c0)); - output->push_back(static_cast((code_point & 0x3ff) | 0xdc00)); - } else { - output->push_back(static_cast(code_point)); - } -} - -// Escaping functions --------------------------------------------------------- - -// Writes the given character to the output as UTF-8, escaped. Call this -// function only when the input is wide. Returns true on success. Failure -// means there was some problem with the encoding, we'll still try to -// update the |*begin| pointer and add a placeholder character to the -// output so processing can continue. -// -// We will append the character starting at ch[begin] with the buffer ch -// being |length|. |*begin| will be updated to point to the last character -// consumed (we may consume more than one for UTF-16) so that if called in -// a loop, incrementing the pointer will move to the next character. -// -// Every single output character will be escaped. This means that if you -// give it an ASCII character as input, it will be escaped. Some code uses -// this when it knows that a character is invalid according to its rules -// for validity. If you don't want escaping for ASCII characters, you will -// have to filter them out prior to calling this function. -// -// Assumes that ch[begin] is within range in the array, but does not assume -// that any following characters are. -inline bool AppendUTF8EscapedChar(const char16* str, int* begin, int length, - CanonOutput* output) { - // UTF-16 input. Readchar16 will handle invalid characters for us and give - // us the kUnicodeReplacementCharacter, so we don't have to do special - // checking after failure, just pass through the failure to the caller. - unsigned char_value; - bool success = ReadUTFChar(str, begin, length, &char_value); - AppendUTF8EscapedValue(char_value, output); - return success; -} - -// Handles UTF-8 input. See the wide version above for usage. -inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length, - CanonOutput* output) { - // ReadUTF8Char will handle invalid characters for us and give us the - // kUnicodeReplacementCharacter, so we don't have to do special checking - // after failure, just pass through the failure to the caller. - unsigned ch; - bool success = ReadUTFChar(str, begin, length, &ch); - AppendUTF8EscapedValue(ch, output); - return success; -} - -// Given a '%' character at |*begin| in the string |spec|, this will decode -// the escaped value and put it into |*unescaped_value| on success (returns -// true). On failure, this will return false, and will not write into -// |*unescaped_value|. -// -// |*begin| will be updated to point to the last character of the escape -// sequence so that when called with the index of a for loop, the next time -// through it will point to the next character to be considered. On failure, -// |*begin| will be unchanged. -inline bool Is8BitChar(char c) { - return true; // this case is specialized to avoid a warning -} -inline bool Is8BitChar(char16 c) { - return c <= 255; -} - -template -inline bool DecodeEscaped(const CHAR* spec, int* begin, int end, - unsigned char* unescaped_value) { - if (*begin + 3 > end || - !Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) { - // Invalid escape sequence because there's not enough room, or the - // digits are not ASCII. - return false; - } - - unsigned char first = static_cast(spec[*begin + 1]); - unsigned char second = static_cast(spec[*begin + 2]); - if (!IsHexChar(first) || !IsHexChar(second)) { - // Invalid hex digits, fail. - return false; - } - - // Valid escape sequence. - *unescaped_value = (HexCharToValue(first) << 4) + HexCharToValue(second); - *begin += 2; - return true; -} - -// Appends the given substring to the output, escaping "some" characters that -// it feels may not be safe. It assumes the input values are all contained in -// 8-bit although it allows any type. -// -// This is used in error cases to append invalid output so that it looks -// approximately correct. Non-error cases should not call this function since -// the escaping rules are not guaranteed! -void AppendInvalidNarrowString(const char* spec, int begin, int end, - CanonOutput* output); -void AppendInvalidNarrowString(const char16* spec, int begin, int end, - CanonOutput* output); - -// Misc canonicalization helpers ---------------------------------------------- - -// Converts between UTF-8 and UTF-16, returning true on successful conversion. -// The output will be appended to the given canonicalizer output (so make sure -// it's empty if you want to replace). -// -// On invalid input, this will still write as much output as possible, -// replacing the invalid characters with the "invalid character". It will -// return false in the failure case, and the caller should not continue as -// normal. -GURL_API bool ConvertUTF16ToUTF8(const char16* input, int input_len, - CanonOutput* output); -GURL_API bool ConvertUTF8ToUTF16(const char* input, int input_len, - CanonOutputT* output); - -// Converts from UTF-16 to 8-bit using the character set converter. If the -// converter is NULL, this will use UTF-8. -void ConvertUTF16ToQueryEncoding(const char16* input, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output); - -// Applies the replacements to the given component source. The component source -// should be pre-initialized to the "old" base. That is, all pointers will -// point to the spec of the old URL, and all of the Parsed components will -// be indices into that string. -// -// The pointers and components in the |source| for all non-NULL strings in the -// |repl| (replacements) will be updated to reference those strings. -// Canonicalizing with the new |source| and |parsed| can then combine URL -// components from many different strings. -void SetupOverrideComponents(const char* base, - const Replacements& repl, - URLComponentSource* source, - url_parse::Parsed* parsed); - -// Like the above 8-bit version, except that it additionally converts the -// UTF-16 input to UTF-8 before doing the overrides. -// -// The given utf8_buffer is used to store the converted components. They will -// be appended one after another, with the parsed structure identifying the -// appropriate substrings. This buffer is a parameter because the source has -// no storage, so the buffer must have the same lifetime as the source -// parameter owned by the caller. -// -// THE CALLER MUST NOT ADD TO THE |utf8_buffer| AFTER THIS CALL. Members of -// |source| will point into this buffer, which could be invalidated if -// additional data is added and the CanonOutput resizes its buffer. -// -// Returns true on success. Fales means that the input was not valid UTF-16, -// although we will have still done the override with "invalid characters" in -// place of errors. -bool SetupUTF16OverrideComponents(const char* base, - const Replacements& repl, - CanonOutput* utf8_buffer, - URLComponentSource* source, - url_parse::Parsed* parsed); - -// Implemented in url_canon_path.cc, these are required by the relative URL -// resolver as well, so we declare them here. -bool CanonicalizePartialPath(const char* spec, - const url_parse::Component& path, - int path_begin_in_output, - CanonOutput* output); -bool CanonicalizePartialPath(const char16* spec, - const url_parse::Component& path, - int path_begin_in_output, - CanonOutput* output); - -#ifndef WIN32 - -// Implementations of Windows' int-to-string conversions -GURL_API int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); -GURL_API int _itow_s(int value, char16* buffer, size_t size_in_chars, - int radix); - -// Secure template overloads for these functions -template -inline int _itoa_s(int value, char (&buffer)[N], int radix) { - return _itoa_s(value, buffer, N, radix); -} - -template -inline int _itow_s(int value, char16 (&buffer)[N], int radix) { - return _itow_s(value, buffer, N, radix); -} - -// _strtoui64 and strtoull behave the same -inline unsigned long long _strtoui64(const char* nptr, - char** endptr, int base) { - return strtoull(nptr, endptr, base); -} - -#endif // WIN32 - -} // namespace url_canon - -#endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal_file.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal_file.h.svn-base deleted file mode 100644 index 63a9c5b85..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_internal_file.h.svn-base +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// As with url_canon_internal.h, this file is intended to be included in -// another C++ file where the template types are defined. This allows the -// programmer to use this to use these functions for their own strings -// types, without bloating the code by having inline templates used in -// every call site. -// -// *** This file must be included after url_canon_internal as we depend on some -// functions in it. *** - -#ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__ -#define GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__ - -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_parse_internal.h" - -using namespace url_canon; - -// Given a pointer into the spec, this copies and canonicalizes the drive -// letter and colon to the output, if one is found. If there is not a drive -// spec, it won't do anything. The index of the next character in the input -// spec is returned (after the colon when a drive spec is found, the begin -// offset if one is not). -template -static int FileDoDriveSpec(const CHAR* spec, int begin, int end, - CanonOutput* output) { - // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, - // (with backslashes instead of slashes as well). - int num_slashes = CountConsecutiveSlashes(spec, begin, end); - int after_slashes = begin + num_slashes; - - if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end)) - return begin; // Haven't consumed any characters - - // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid - // and that it is followed by a colon/pipe. - - // Normalize Windows drive letters to uppercase - if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z') - output->push_back(spec[after_slashes] - 'a' + 'A'); - else - output->push_back(static_cast(spec[after_slashes])); - - // Normalize the character following it to a colon rather than pipe. - output->push_back(':'); - output->push_back('/'); - return after_slashes + 2; -} - -// FileDoDriveSpec will have already added the first backslash, so we need to -// write everything following the slashes using the path canonicalizer. -template -static void FileDoPath(const CHAR* spec, int begin, int end, - CanonOutput* output) { - // Normalize the number of slashes after the drive letter. The path - // canonicalizer expects the input to begin in a slash already so - // doesn't check. We want to handle no-slashes - int num_slashes = CountConsecutiveSlashes(spec, begin, end); - int after_slashes = begin + num_slashes; - - // Now use the regular path canonicalizer to canonicalize the rest of the - // path. We supply it with the path following the slashes. It won't prepend - // a slash because it assumes any nonempty path already starts with one. - // We explicitly filter out calls with no path here to prevent that case. - ParsedURL::Component sub_path(after_slashes, end - after_slashes); - if (sub_path.len > 0) { - // Give it a fake output component to write into. DoCanonicalizeFile will - // compute the full path component. - ParsedURL::Component fake_output_path; - URLCanonInternal::DoPath( - spec, sub_path, output, &fake_output_path); - } -} - -template -static bool DoCanonicalizeFileURL(const URLComponentSource& source, - const ParsedURL& parsed, - CanonOutput* output, - ParsedURL* new_parsed) { - // Things we don't set in file: URLs. - new_parsed->username = ParsedURL::Component(0, -1); - new_parsed->password = ParsedURL::Component(0, -1); - new_parsed->port = ParsedURL::Component(0, -1); - - // Scheme (known, so we don't bother running it through the more - // complicated scheme canonicalizer). - new_parsed->scheme.begin = output->length(); - output->push_back('f'); - output->push_back('i'); - output->push_back('l'); - output->push_back('e'); - new_parsed->scheme.len = output->length() - new_parsed->scheme.begin; - output->push_back(':'); - - // Write the separator for the host. - output->push_back('/'); - output->push_back('/'); - - // Append the host. For many file URLs, this will be empty. For UNC, this - // will be present. - // TODO(brettw) This doesn't do any checking for host name validity. We - // should probably handle validity checking of UNC hosts differently than - // for regular IP hosts. - bool success = URLCanonInternal::DoHost( - source.host, parsed.host, output, &new_parsed->host); - - // Write a separator for the start of the path. We'll ignore any slashes - // already at the beginning of the path. - new_parsed->path.begin = output->length(); - output->push_back('/'); - - // Copies and normalizes the "c:" at the beginning, if present. - int after_drive = FileDoDriveSpec(source.path, parsed.path.begin, - parsed.path.end(), output); - - // Copies the rest of the path - FileDoPath(source.path, after_drive, parsed.path.end(), output); - new_parsed->path.len = output->length() - new_parsed->path.begin; - - // Things following the path we can use the standard canonicalizers for. - success &= URLCanonInternal::DoQuery( - source.query, parsed.query, output, &new_parsed->query); - success &= URLCanonInternal::DoRef( - source.ref, parsed.ref, output, &new_parsed->ref); - - return success; -} - -#endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.cc.svn-base deleted file mode 100644 index 1421e79f4..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.cc.svn-base +++ /dev/null @@ -1,730 +0,0 @@ -// Copyright 2009, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "googleurl/src/url_canon_ip.h" - -#include - -#include "base/basictypes.h" -#include "base/logging.h" -#include "googleurl/src/url_canon_internal.h" - -namespace url_canon { - -namespace { - -// Converts one of the character types that represent a numerical base to the -// corresponding base. -int BaseForType(SharedCharTypes type) { - switch (type) { - case CHAR_HEX: - return 16; - case CHAR_DEC: - return 10; - case CHAR_OCT: - return 8; - default: - return 0; - } -} - -template -bool DoFindIPv4Components(const CHAR* spec, - const url_parse::Component& host, - url_parse::Component components[4]) { - if (!host.is_nonempty()) - return false; - - int cur_component = 0; // Index of the component we're working on. - int cur_component_begin = host.begin; // Start of the current component. - int end = host.end(); - for (int i = host.begin; /* nothing */; i++) { - if (i >= end || spec[i] == '.') { - // Found the end of the current component. - int component_len = i - cur_component_begin; - components[cur_component] = - url_parse::Component(cur_component_begin, component_len); - - // The next component starts after the dot. - cur_component_begin = i + 1; - cur_component++; - - // Don't allow empty components (two dots in a row), except we may - // allow an empty component at the end (this would indicate that the - // input ends in a dot). We also want to error if the component is - // empty and it's the only component (cur_component == 1). - if (component_len == 0 && (i < end || cur_component == 1)) - return false; - - if (i >= end) - break; // End of the input. - - if (cur_component == 4) { - // Anything else after the 4th component is an error unless it is a - // dot that would otherwise be treated as the end of input. - if (spec[i] == '.' && i + 1 == end) - break; - return false; - } - } else if (static_cast(spec[i]) >= 0x80 || - !IsIPv4Char(static_cast(spec[i]))) { - // Invalid character for an IPv4 address. - return false; - } - } - - // Fill in any unused components. - while (cur_component < 4) - components[cur_component++] = url_parse::Component(); - return true; -} - -// Converts an IPv4 component to a 32-bit number, while checking for overflow. -// -// Possible return values: -// - IPV4 - The number was valid, and did not overflow. -// - BROKEN - The input was numeric, but too large for a 32-bit field. -// - NEUTRAL - Input was not numeric. -// -// The input is assumed to be ASCII. FindIPv4Components should have stripped -// out any input that is greater than 7 bits. The components are assumed -// to be non-empty. -template -CanonHostInfo::Family IPv4ComponentToNumber( - const CHAR* spec, - const url_parse::Component& component, - uint32* number) { - // Figure out the base - SharedCharTypes base; - int base_prefix_len = 0; // Size of the prefix for this base. - if (spec[component.begin] == '0') { - // Either hex or dec, or a standalone zero. - if (component.len == 1) { - base = CHAR_DEC; - } else if (spec[component.begin + 1] == 'X' || - spec[component.begin + 1] == 'x') { - base = CHAR_HEX; - base_prefix_len = 2; - } else { - base = CHAR_OCT; - base_prefix_len = 1; - } - } else { - base = CHAR_DEC; - } - - // Extend the prefix to consume all leading zeros. - while (base_prefix_len < component.len && - spec[component.begin + base_prefix_len] == '0') - base_prefix_len++; - - // Put the component, minus any base prefix, into a NULL-terminated buffer so - // we can call the standard library. Because leading zeros have already been - // discarded, filling the entire buffer is guaranteed to trigger the 32-bit - // overflow check. - const int kMaxComponentLen = 16; - char buf[kMaxComponentLen + 1]; // digits + '\0' - int dest_i = 0; - for (int i = component.begin + base_prefix_len; i < component.end(); i++) { - // We know the input is 7-bit, so convert to narrow (if this is the wide - // version of the template) by casting. - char input = static_cast(spec[i]); - - // Validate that this character is OK for the given base. - if (!IsCharOfType(input, base)) - return CanonHostInfo::NEUTRAL; - - // Fill the buffer, if there's space remaining. This check allows us to - // verify that all characters are numeric, even those that don't fit. - if (dest_i < kMaxComponentLen) - buf[dest_i++] = input; - } - - buf[dest_i] = '\0'; - - // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal - // number can overflow a 64-bit number in <= 16 characters). - uint64 num = _strtoui64(buf, NULL, BaseForType(base)); - - // Check for 32-bit overflow. - if (num > kuint32max) - return CanonHostInfo::BROKEN; - - // No overflow. Success! - *number = static_cast(num); - return CanonHostInfo::IPV4; -} - -// See declaration of IPv4AddressToNumber for documentation. -template -CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec, - const url_parse::Component& host, - unsigned char address[4], - int* num_ipv4_components) { - // The identified components. Not all may exist. - url_parse::Component components[4]; - if (!FindIPv4Components(spec, host, components)) - return CanonHostInfo::NEUTRAL; - - // Convert existing components to digits. Values up to - // |existing_components| will be valid. - uint32 component_values[4]; - int existing_components = 0; - - // Set to true if one or more components are BROKEN. BROKEN is only - // returned if all components are IPV4 or BROKEN, so, for example, - // 12345678912345.de returns NEUTRAL rather than broken. - bool broken = false; - for (int i = 0; i < 4; i++) { - if (components[i].len <= 0) - continue; - CanonHostInfo::Family family = IPv4ComponentToNumber( - spec, components[i], &component_values[existing_components]); - - if (family == CanonHostInfo::BROKEN) { - broken = true; - } else if (family != CanonHostInfo::IPV4) { - // Stop if we hit a non-BROKEN invalid non-empty component. - return family; - } - - existing_components++; - } - - if (broken) - return CanonHostInfo::BROKEN; - - // Use that sequence of numbers to fill out the 4-component IP address. - - // First, process all components but the last, while making sure each fits - // within an 8-bit field. - for (int i = 0; i < existing_components - 1; i++) { - if (component_values[i] > kuint8max) - return CanonHostInfo::BROKEN; - address[i] = static_cast(component_values[i]); - } - - // Next, consume the last component to fill in the remaining bytes. - uint32 last_value = component_values[existing_components - 1]; - for (int i = 3; i >= existing_components - 1; i--) { - address[i] = static_cast(last_value); - last_value >>= 8; - } - - // If the last component has residual bits, report overflow. - if (last_value != 0) - return CanonHostInfo::BROKEN; - - // Tell the caller how many components we saw. - *num_ipv4_components = existing_components; - - // Success! - return CanonHostInfo::IPV4; -} - -// Return true if we've made a final IPV4/BROKEN decision, false if the result -// is NEUTRAL, and we could use a second opinion. -template -bool DoCanonicalizeIPv4Address(const CHAR* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info) { - host_info->family = IPv4AddressToNumber( - spec, host, host_info->address, &host_info->num_ipv4_components); - - switch (host_info->family) { - case CanonHostInfo::IPV4: - // Definitely an IPv4 address. - host_info->out_host.begin = output->length(); - AppendIPv4Address(host_info->address, output); - host_info->out_host.len = output->length() - host_info->out_host.begin; - return true; - case CanonHostInfo::BROKEN: - // Definitely broken. - return true; - default: - // Could be IPv6 or a hostname. - return false; - } -} - -// Helper class that describes the main components of an IPv6 input string. -// See the following examples to understand how it breaks up an input string: -// -// [Example 1]: input = "[::aa:bb]" -// ==> num_hex_components = 2 -// ==> hex_components[0] = Component(3,2) "aa" -// ==> hex_components[1] = Component(6,2) "bb" -// ==> index_of_contraction = 0 -// ==> ipv4_component = Component(0, -1) -// -// [Example 2]: input = "[1:2::3:4:5]" -// ==> num_hex_components = 5 -// ==> hex_components[0] = Component(1,1) "1" -// ==> hex_components[1] = Component(3,1) "2" -// ==> hex_components[2] = Component(6,1) "3" -// ==> hex_components[3] = Component(8,1) "4" -// ==> hex_components[4] = Component(10,1) "5" -// ==> index_of_contraction = 2 -// ==> ipv4_component = Component(0, -1) -// -// [Example 3]: input = "[::ffff:192.168.0.1]" -// ==> num_hex_components = 1 -// ==> hex_components[0] = Component(3,4) "ffff" -// ==> index_of_contraction = 0 -// ==> ipv4_component = Component(8, 11) "192.168.0.1" -// -// [Example 4]: input = "[1::]" -// ==> num_hex_components = 1 -// ==> hex_components[0] = Component(1,1) "1" -// ==> index_of_contraction = 1 -// ==> ipv4_component = Component(0, -1) -// -// [Example 5]: input = "[::192.168.0.1]" -// ==> num_hex_components = 0 -// ==> index_of_contraction = 0 -// ==> ipv4_component = Component(8, 11) "192.168.0.1" -// -struct IPv6Parsed { - // Zero-out the parse information. - void reset() { - num_hex_components = 0; - index_of_contraction = -1; - ipv4_component.reset(); - } - - // There can be up to 8 hex components (colon separated) in the literal. - url_parse::Component hex_components[8]; - - // The count of hex components present. Ranges from [0,8]. - int num_hex_components; - - // The index of the hex component that the "::" contraction precedes, or - // -1 if there is no contraction. - int index_of_contraction; - - // The range of characters which are an IPv4 literal. - url_parse::Component ipv4_component; -}; - -// Parse the IPv6 input string. If parsing succeeded returns true and fills -// |parsed| with the information. If parsing failed (because the input is -// invalid) returns false. -template -bool DoParseIPv6(const CHAR* spec, - const url_parse::Component& host, - IPv6Parsed* parsed) { - // Zero-out the info. - parsed->reset(); - - if (!host.is_nonempty()) - return false; - - // The index for start and end of address range (no brackets). - int begin = host.begin; - int end = host.end(); - - int cur_component_begin = begin; // Start of the current component. - - // Scan through the input, searching for hex components, "::" contractions, - // and IPv4 components. - for (int i = begin; /* i <= end */; i++) { - bool is_colon = spec[i] == ':'; - bool is_contraction = is_colon && i < end - 1 && spec[i + 1] == ':'; - - // We reached the end of the current component if we encounter a colon - // (separator between hex components, or start of a contraction), or end of - // input. - if (is_colon || i == end) { - int component_len = i - cur_component_begin; - - // A component should not have more than 4 hex digits. - if (component_len > 4) - return false; - - // Don't allow empty components. - if (component_len == 0) { - // The exception is when contractions appear at beginning of the - // input or at the end of the input. - if (!((is_contraction && i == begin) || (i == end && - parsed->index_of_contraction == parsed->num_hex_components))) - return false; - } - - // Add the hex component we just found to running list. - if (component_len > 0) { - // Can't have more than 8 components! - if (parsed->num_hex_components >= 8) - return false; - - parsed->hex_components[parsed->num_hex_components++] = - url_parse::Component(cur_component_begin, component_len); - } - } - - if (i == end) - break; // Reached the end of the input, DONE. - - // We found a "::" contraction. - if (is_contraction) { - // There can be at most one contraction in the literal. - if (parsed->index_of_contraction != -1) - return false; - parsed->index_of_contraction = parsed->num_hex_components; - ++i; // Consume the colon we peeked. - } - - if (is_colon) { - // Colons are separators between components, keep track of where the - // current component started (after this colon). - cur_component_begin = i + 1; - } else { - if (static_cast(spec[i]) >= 0x80) - return false; // Not ASCII. - - if (!IsHexChar(static_cast(spec[i]))) { - // Regular components are hex numbers. It is also possible for - // a component to be an IPv4 address in dotted form. - if (IsIPv4Char(static_cast(spec[i]))) { - // Since IPv4 address can only appear at the end, assume the rest - // of the string is an IPv4 address. (We will parse this separately - // later). - parsed->ipv4_component = url_parse::Component( - cur_component_begin, end - cur_component_begin); - break; - } else { - // The character was neither a hex digit, nor an IPv4 character. - return false; - } - } - } - } - - return true; -} - -// Verifies the parsed IPv6 information, checking that the various components -// add up to the right number of bits (hex components are 16 bits, while -// embedded IPv4 formats are 32 bits, and contractions are placeholdes for -// 16 or more bits). Returns true if sizes match up, false otherwise. On -// success writes the length of the contraction (if any) to -// |out_num_bytes_of_contraction|. -bool CheckIPv6ComponentsSize(const IPv6Parsed& parsed, - int* out_num_bytes_of_contraction) { - // Each group of four hex digits contributes 16 bits. - int num_bytes_without_contraction = parsed.num_hex_components * 2; - - // If an IPv4 address was embedded at the end, it contributes 32 bits. - if (parsed.ipv4_component.is_valid()) - num_bytes_without_contraction += 4; - - // If there was a "::" contraction, its size is going to be: - // MAX([16bits], [128bits] - num_bytes_without_contraction). - int num_bytes_of_contraction = 0; - if (parsed.index_of_contraction != -1) { - num_bytes_of_contraction = 16 - num_bytes_without_contraction; - if (num_bytes_of_contraction < 2) - num_bytes_of_contraction = 2; - } - - // Check that the numbers add up. - if (num_bytes_without_contraction + num_bytes_of_contraction != 16) - return false; - - *out_num_bytes_of_contraction = num_bytes_of_contraction; - return true; -} - -// Converts a hex comonent into a number. This cannot fail since the caller has -// already verified that each character in the string was a hex digit, and -// that there were no more than 4 characters. -template -uint16 IPv6HexComponentToNumber(const CHAR* spec, - const url_parse::Component& component) { - DCHECK(component.len <= 4); - - // Copy the hex string into a C-string. - char buf[5]; - for (int i = 0; i < component.len; ++i) - buf[i] = static_cast(spec[component.begin + i]); - buf[component.len] = '\0'; - - // Convert it to a number (overflow is not possible, since with 4 hex - // characters we can at most have a 16 bit number). - return static_cast(_strtoui64(buf, NULL, 16)); -} - -// Converts an IPv6 address to a 128-bit number (network byte order), returning -// true on success. False means that the input was not a valid IPv6 address. -template -bool DoIPv6AddressToNumber(const CHAR* spec, - const url_parse::Component& host, - unsigned char address[16]) { - // Make sure the component is bounded by '[' and ']'. - int end = host.end(); - if (!host.is_nonempty() || spec[host.begin] != '[' || spec[end - 1] != ']') - return false; - - // Exclude the square brackets. - url_parse::Component ipv6_comp(host.begin + 1, host.len - 2); - - // Parse the IPv6 address -- identify where all the colon separated hex - // components are, the "::" contraction, and the embedded IPv4 address. - IPv6Parsed ipv6_parsed; - if (!DoParseIPv6(spec, ipv6_comp, &ipv6_parsed)) - return false; - - // Do some basic size checks to make sure that the address doesn't - // specify more than 128 bits or fewer than 128 bits. This also resolves - // how may zero bytes the "::" contraction represents. - int num_bytes_of_contraction; - if (!CheckIPv6ComponentsSize(ipv6_parsed, &num_bytes_of_contraction)) - return false; - - int cur_index_in_address = 0; - - // Loop through each hex components, and contraction in order. - for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) { - // Append the contraction if it appears before this component. - if (i == ipv6_parsed.index_of_contraction) { - for (int j = 0; j < num_bytes_of_contraction; ++j) - address[cur_index_in_address++] = 0; - } - // Append the hex component's value. - if (i != ipv6_parsed.num_hex_components) { - // Get the 16-bit value for this hex component. - uint16 number = IPv6HexComponentToNumber( - spec, ipv6_parsed.hex_components[i]); - // Append to |address|, in network byte order. - address[cur_index_in_address++] = (number & 0xFF00) >> 8; - address[cur_index_in_address++] = (number & 0x00FF); - } - } - - // If there was an IPv4 section, convert it into a 32-bit number and append - // it to |address|. - if (ipv6_parsed.ipv4_component.is_valid()) { - // Append the 32-bit number to |address|. - int ignored_num_ipv4_components; - if (CanonHostInfo::IPV4 != - IPv4AddressToNumber(spec, - ipv6_parsed.ipv4_component, - &address[cur_index_in_address], - &ignored_num_ipv4_components)) - return false; - } - - return true; -} - -// Searches for the longest sequence of zeros in |address|, and writes the -// range into |contraction_range|. The run of zeros must be at least 16 bits, -// and if there is a tie the first is chosen. -void ChooseIPv6ContractionRange(const unsigned char address[16], - url_parse::Component* contraction_range) { - // The longest run of zeros in |address| seen so far. - url_parse::Component max_range; - - // The current run of zeros in |address| being iterated over. - url_parse::Component cur_range; - - for (int i = 0; i < 16; i += 2) { - // Test for 16 bits worth of zero. - bool is_zero = (address[i] == 0 && address[i + 1] == 0); - - if (is_zero) { - // Add the zero to the current range (or start a new one). - if (!cur_range.is_valid()) - cur_range = url_parse::Component(i, 0); - cur_range.len += 2; - } - - if (!is_zero || i == 14) { - // Just completed a run of zeros. If the run is greater than 16 bits, - // it is a candidate for the contraction. - if (cur_range.len > 2 && cur_range.len > max_range.len) { - max_range = cur_range; - } - cur_range.reset(); - } - } - *contraction_range = max_range; -} - -// Return true if we've made a final IPV6/BROKEN decision, false if the result -// is NEUTRAL, and we could use a second opinion. -template -bool DoCanonicalizeIPv6Address(const CHAR* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info) { - // Turn the IP address into a 128 bit number. - if (!IPv6AddressToNumber(spec, host, host_info->address)) { - // If it's not an IPv6 address, scan for characters that should *only* - // exist in an IPv6 address. - for (int i = host.begin; i < host.end(); i++) { - switch (spec[i]) { - case '[': - case ']': - case ':': - host_info->family = CanonHostInfo::BROKEN; - return true; - } - } - - // No invalid characters. Could still be IPv4 or a hostname. - host_info->family = CanonHostInfo::NEUTRAL; - return false; - } - - host_info->out_host.begin = output->length(); - output->push_back('['); - AppendIPv6Address(host_info->address, output); - output->push_back(']'); - host_info->out_host.len = output->length() - host_info->out_host.begin; - - host_info->family = CanonHostInfo::IPV6; - return true; -} - -} // namespace - -void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) { - for (int i = 0; i < 4; i++) { - char str[16]; - _itoa_s(address[i], str, 10); - - for (int ch = 0; str[ch] != 0; ch++) - output->push_back(str[ch]); - - if (i != 3) - output->push_back('.'); - } -} - -void AppendIPv6Address(const unsigned char address[16], CanonOutput* output) { - // We will output the address according to the rules in: - // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#section-4 - - // Start by finding where to place the "::" contraction (if any). - url_parse::Component contraction_range; - ChooseIPv6ContractionRange(address, &contraction_range); - - for (int i = 0; i <= 14;) { - // We check 2 bytes at a time, from bytes (0, 1) to (14, 15), inclusive. - DCHECK(i % 2 == 0); - if (i == contraction_range.begin && contraction_range.len > 0) { - // Jump over the contraction. - if (i == 0) - output->push_back(':'); - output->push_back(':'); - i = contraction_range.end(); - } else { - // Consume the next 16 bits from |address|. - int x = address[i] << 8 | address[i + 1]; - - i += 2; - - // Stringify the 16 bit number (at most requires 4 hex digits). - char str[5]; - _itoa_s(x, str, 16); - for (int ch = 0; str[ch] != 0; ++ch) - output->push_back(str[ch]); - - // Put a colon after each number, except the last. - if (i < 16) - output->push_back(':'); - } - } -} - -bool FindIPv4Components(const char* spec, - const url_parse::Component& host, - url_parse::Component components[4]) { - return DoFindIPv4Components(spec, host, components); -} - -bool FindIPv4Components(const char16* spec, - const url_parse::Component& host, - url_parse::Component components[4]) { - return DoFindIPv4Components(spec, host, components); -} - -void CanonicalizeIPAddress(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info) { - if (DoCanonicalizeIPv4Address( - spec, host, output, host_info)) - return; - if (DoCanonicalizeIPv6Address( - spec, host, output, host_info)) - return; -} - -void CanonicalizeIPAddress(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info) { - if (DoCanonicalizeIPv4Address( - spec, host, output, host_info)) - return; - if (DoCanonicalizeIPv6Address( - spec, host, output, host_info)) - return; -} - -CanonHostInfo::Family IPv4AddressToNumber(const char* spec, - const url_parse::Component& host, - unsigned char address[4], - int* num_ipv4_components) { - return DoIPv4AddressToNumber(spec, host, address, num_ipv4_components); -} - -CanonHostInfo::Family IPv4AddressToNumber(const char16* spec, - const url_parse::Component& host, - unsigned char address[4], - int* num_ipv4_components) { - return DoIPv4AddressToNumber( - spec, host, address, num_ipv4_components); -} - -bool IPv6AddressToNumber(const char* spec, - const url_parse::Component& host, - unsigned char address[16]) { - return DoIPv6AddressToNumber(spec, host, address); -} - -bool IPv6AddressToNumber(const char16* spec, - const url_parse::Component& host, - unsigned char address[16]) { - return DoIPv6AddressToNumber(spec, host, address); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.h.svn-base deleted file mode 100644 index a2900c6a8..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_ip.h.svn-base +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLEURL_SRC_URL_CANON_IP_H__ -#define GOOGLEURL_SRC_URL_CANON_IP_H__ - -#include "base/string16.h" -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_common.h" -#include "googleurl/src/url_parse.h" - -namespace url_canon { - -// Writes the given IPv4 address to |output|. -GURL_API void AppendIPv4Address(const unsigned char address[4], - CanonOutput* output); - -// Writes the given IPv6 address to |output|. -GURL_API void AppendIPv6Address(const unsigned char address[16], - CanonOutput* output); - -// Searches the host name for the portions of the IPv4 address. On success, -// each component will be placed into |components| and it will return true. -// It will return false if the host can not be separated as an IPv4 address -// or if there are any non-7-bit characters or other characters that can not -// be in an IP address. (This is important so we fail as early as possible for -// common non-IP hostnames.) -// -// Not all components may exist. If there are only 3 components, for example, -// the last one will have a length of -1 or 0 to indicate it does not exist. -// -// Note that many platform's inet_addr will ignore everything after a space -// in certain curcumstances if the stuff before the space looks like an IP -// address. IE6 is included in this. We do NOT handle this case. In many cases, -// the browser's canonicalization will get run before this which converts -// spaces to %20 (in the case of IE7) or rejects them (in the case of -// Mozilla), so this code path never gets hit. Our host canonicalization will -// notice these spaces and escape them, which will make IP address finding -// fail. This seems like better behavior than stripping after a space. -GURL_API bool FindIPv4Components(const char* spec, - const url_parse::Component& host, - url_parse::Component components[4]); -GURL_API bool FindIPv4Components(const char16* spec, - const url_parse::Component& host, - url_parse::Component components[4]); - -// Converts an IPv4 address to a 32-bit number (network byte order). -// -// Possible return values: -// IPV4 - IPv4 address was successfully parsed. -// BROKEN - Input was formatted like an IPv4 address, but overflow occurred -// during parsing. -// NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address. -// It might be an IPv6 address, or a hostname. -// -// On success, |num_ipv4_components| will be populated with the number of -// components in the IPv4 address. -GURL_API CanonHostInfo::Family IPv4AddressToNumber( - const char* spec, - const url_parse::Component& host, - unsigned char address[4], - int* num_ipv4_components); -GURL_API CanonHostInfo::Family IPv4AddressToNumber( - const char16* spec, - const url_parse::Component& host, - unsigned char address[4], - int* num_ipv4_components); - -// Converts an IPv6 address to a 128-bit number (network byte order), returning -// true on success. False means that the input was not a valid IPv6 address. -// -// NOTE that |host| is expected to be surrounded by square brackets. -// i.e. "[::1]" rather than "::1". -GURL_API bool IPv6AddressToNumber(const char* spec, - const url_parse::Component& host, - unsigned char address[16]); -GURL_API bool IPv6AddressToNumber(const char16* spec, - const url_parse::Component& host, - unsigned char address[16]); - -} // namespace url_canon - -#endif // GOOGLEURL_SRC_URL_CANON_IP_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_mailtourl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_mailtourl.cc.svn-base deleted file mode 100644 index 97868b8ab..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_mailtourl.cc.svn-base +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Functions for canonicalizing "mailto:" URLs. - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_parse_internal.h" - -namespace url_canon { - -namespace { - - -template -bool DoCanonicalizeMailtoURL(const URLComponentSource& source, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - - // mailto: only uses {scheme, path, query} -- clear the rest. - new_parsed->username = url_parse::Component(); - new_parsed->password = url_parse::Component(); - new_parsed->host = url_parse::Component(); - new_parsed->port = url_parse::Component(); - new_parsed->ref = url_parse::Component(); - - // Scheme (known, so we don't bother running it through the more - // complicated scheme canonicalizer). - new_parsed->scheme.begin = output->length(); - output->Append("mailto:", 7); - new_parsed->scheme.len = 6; - - bool success = true; - - // Path - if (parsed.path.is_valid()) { - new_parsed->path.begin = output->length(); - - // Copy the path using path URL's more lax escaping rules. - // We convert to UTF-8 and escape non-ASCII, but leave all - // ASCII characters alone. - int end = parsed.path.end(); - for (int i = parsed.path.begin; i < end; ++i) { - UCHAR uch = static_cast(source.path[i]); - if (uch < 0x20 || uch >= 0x80) - success &= AppendUTF8EscapedChar(source.path, &i, end, output); - else - output->push_back(static_cast(uch)); - } - - new_parsed->path.len = output->length() - new_parsed->path.begin; - } else { - // No path at all - new_parsed->path.reset(); - } - - // Query -- always use the default utf8 charset converter. - CanonicalizeQuery(source.query, parsed.query, NULL, - output, &new_parsed->query); - - return success; -} - -} // namespace - -bool CanonicalizeMailtoURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeMailtoURL( - URLComponentSource(spec), parsed, output, new_parsed); -} - -bool CanonicalizeMailtoURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeMailtoURL( - URLComponentSource(spec), parsed, output, new_parsed); -} - -bool ReplaceMailtoURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupOverrideComponents(base, replacements, &source, &parsed); - return DoCanonicalizeMailtoURL( - source, parsed, output, new_parsed); -} - -bool ReplaceMailtoURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - RawCanonOutput<1024> utf8; - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); - return DoCanonicalizeMailtoURL( - source, parsed, output, new_parsed); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_path.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_path.cc.svn-base deleted file mode 100644 index d86643a94..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_path.cc.svn-base +++ /dev/null @@ -1,378 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Canonicalization functions for the paths of URLs. - -#include "base/logging.h" -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_parse_internal.h" - -namespace url_canon { - -namespace { - -enum CharacterFlags { - // Pass through unchanged, whether escaped or unescaped. This doesn't - // actually set anything so you can't OR it to check, it's just to make the - // table below more clear when neither ESCAPE or UNESCAPE is set. - PASS = 0, - - // This character requires special handling in DoPartialPath. Doing this test - // first allows us to filter out the common cases of regular characters that - // can be directly copied. - SPECIAL = 1, - - // This character must be escaped in the canonical output. Note that all - // escaped chars also have the "special" bit set so that the code that looks - // for this is triggered. Not valid with PASS or ESCAPE - ESCAPE_BIT = 2, - ESCAPE = ESCAPE_BIT | SPECIAL, - - // This character must be unescaped in canonical output. Not valid with - // ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these - // characters unescaped, they should just be copied. - UNESCAPE = 4, - - // This character is disallowed in URLs. Note that the "special" bit is also - // set to trigger handling. - INVALID_BIT = 8, - INVALID = INVALID_BIT | SPECIAL, -}; - -// This table contains one of the above flag values. Note some flags are more -// than one bits because they also turn on the "special" flag. Special is the -// only flag that may be combined with others. -// -// This table is designed to match exactly what IE does with the characters. -// -// Dot is even more special, and the escaped version is handled specially by -// IsDot. Therefore, we don't need the "escape" flag, and even the "unescape" -// bit is never handled (we just need the "special") bit. -const unsigned char kPathCharLookup[0x100] = { -// NULL control chars... - INVALID, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, -// control chars... - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, -// ' ' ! " # $ % & ' ( ) * + , - . / - ESCAPE, PASS, ESCAPE, ESCAPE, PASS, ESCAPE, PASS, PASS, PASS, PASS, PASS, PASS, PASS, UNESCAPE,SPECIAL, PASS, -// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? - UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS, PASS, ESCAPE, PASS, ESCAPE, ESCAPE, -// @ A B C D E F G H I J K L M N O - PASS, UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE, -// P Q R S T U V W X Y Z [ \ ] ^ _ - UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS, ESCAPE, PASS, ESCAPE, UNESCAPE, -// ` a b c d e f g h i j k l m n o - ESCAPE, UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE, -// p q r s t u v w x y z { | } ~ - UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,ESCAPE, ESCAPE, ESCAPE, UNESCAPE,ESCAPE, -// ...all the high-bit characters are escaped - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, - ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE}; - -enum DotDisposition { - // The given dot is just part of a filename and is not special. - NOT_A_DIRECTORY, - - // The given dot is the current directory. - DIRECTORY_CUR, - - // The given dot is the first of a double dot that should take us up one. - DIRECTORY_UP -}; - -// When the path resolver finds a dot, this function is called with the -// character following that dot to see what it is. The return value -// indicates what type this dot is (see above). This code handles the case -// where the dot is at the end of the input. -// -// |*consumed_len| will contain the number of characters in the input that -// express what we found. -// -// If the input is "../foo", |after_dot| = 1, |end| = 6, and -// at the end, |*consumed_len| = 2 for the "./" this function consumed. The -// original dot length should be handled by the caller. -template -DotDisposition ClassifyAfterDot(const CHAR* spec, int after_dot, - int end, int* consumed_len) { - if (after_dot == end) { - // Single dot at the end. - *consumed_len = 0; - return DIRECTORY_CUR; - } - if (url_parse::IsURLSlash(spec[after_dot])) { - // Single dot followed by a slash. - *consumed_len = 1; // Consume the slash - return DIRECTORY_CUR; - } - - int second_dot_len = IsDot(spec, after_dot, end); - if (second_dot_len) { - int after_second_dot = after_dot + second_dot_len; - if (after_second_dot == end) { - // Double dot at the end. - *consumed_len = second_dot_len; - return DIRECTORY_UP; - } - if (url_parse::IsURLSlash(spec[after_second_dot])) { - // Double dot followed by a slash. - *consumed_len = second_dot_len + 1; - return DIRECTORY_UP; - } - } - - // The dots are followed by something else, not a directory. - *consumed_len = 0; - return NOT_A_DIRECTORY; -} - -// Rewinds the output to the previous slash. It is assumed that the output -// ends with a slash and this doesn't count (we call this when we are -// appending directory paths, so the previous path component has and ending -// slash). -// -// This will stop at the first slash (assumed to be at position -// |path_begin_in_output| and not go any higher than that. Some web pages -// do ".." too many times, so we need to handle that brokenness. -// -// It searches for a literal slash rather than including a backslash as well -// because it is run only on the canonical output. -// -// The output is guaranteed to end in a slash when this function completes. -void BackUpToPreviousSlash(int path_begin_in_output, - CanonOutput* output) { - DCHECK(output->length() > 0); - - int i = output->length() - 1; - DCHECK(output->at(i) == '/'); - if (i == path_begin_in_output) - return; // We're at the first slash, nothing to do. - - // Now back up (skipping the trailing slash) until we find another slash. - i--; - while (output->at(i) != '/' && i > path_begin_in_output) - i--; - - // Now shrink the output to just include that last slash we found. - output->set_length(i + 1); -} - -// Appends the given path to the output. It assumes that if the input path -// starts with a slash, it should be copied to the output. If no path has -// already been appended to the output (the case when not resolving -// relative URLs), the path should begin with a slash. -// -// If there are already path components (this mode is used when appending -// relative paths for resolving), it assumes that the output already has -// a trailing slash and that if the input begins with a slash, it should be -// copied to the output. -// -// We do not collapse multiple slashes in a row to a single slash. It seems -// no web browsers do this, and we don't want incompababilities, even though -// it would be correct for most systems. -template -bool DoPartialPath(const CHAR* spec, - const url_parse::Component& path, - int path_begin_in_output, - CanonOutput* output) { - int end = path.end(); - - bool success = true; - for (int i = path.begin; i < end; i++) { - UCHAR uch = static_cast(spec[i]); - if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) { - // We only need to test wide input for having non-ASCII characters. For - // narrow input, we'll always just use the lookup table. We don't try to - // do anything tricky with decoding/validating UTF-8. This function will - // read one or two UTF-16 characters and append the output as UTF-8. This - // call will be removed in 8-bit mode. - success &= AppendUTF8EscapedChar(spec, &i, end, output); - } else { - // Normal ASCII character or 8-bit input, use the lookup table. - unsigned char out_ch = static_cast(uch); - unsigned char flags = kPathCharLookup[out_ch]; - if (flags & SPECIAL) { - // Needs special handling of some sort. - int dotlen; - if ((dotlen = IsDot(spec, i, end)) > 0) { - // See if this dot was preceeded by a slash in the output. We - // assume that when canonicalizing paths, they will always - // start with a slash and not a dot, so we don't have to - // bounds check the output. - // - // Note that we check this in the case of dots so we don't have to - // special case slashes. Since slashes are much more common than - // dots, this actually increases performance measurably (though - // slightly). - DCHECK(output->length() > path_begin_in_output); - if (output->length() > path_begin_in_output && - output->at(output->length() - 1) == '/') { - // Slash followed by a dot, check to see if this is means relative - int consumed_len; - switch (ClassifyAfterDot(spec, i + dotlen, end, - &consumed_len)) { - case NOT_A_DIRECTORY: - // Copy the dot to the output, it means nothing special. - output->push_back('.'); - i += dotlen - 1; - break; - case DIRECTORY_CUR: // Current directory, just skip the input. - i += dotlen + consumed_len - 1; - break; - case DIRECTORY_UP: - BackUpToPreviousSlash(path_begin_in_output, output); - i += dotlen + consumed_len - 1; - break; - } - } else { - // This dot is not preceeded by a slash, it is just part of some - // file name. - output->push_back('.'); - i += dotlen - 1; - } - - } else if (out_ch == '\\') { - // Convert backslashes to forward slashes - output->push_back('/'); - - } else if (out_ch == '%') { - // Handle escape sequences. - unsigned char unescaped_value; - if (DecodeEscaped(spec, &i, end, &unescaped_value)) { - // Valid escape sequence, see if we keep, reject, or unescape it. - char unescaped_flags = kPathCharLookup[unescaped_value]; - - if (unescaped_flags & UNESCAPE) { - // This escaped value shouldn't be escaped, copy it. - output->push_back(unescaped_value); - } else if (unescaped_flags & INVALID_BIT) { - // Invalid escaped character, copy it and remember the error. - output->push_back('%'); - output->push_back(static_cast(spec[i - 1])); - output->push_back(static_cast(spec[i])); - success = false; - } else { - // Valid escaped character but we should keep it escaped. We - // don't want to change the case of any hex letters in case - // the server is sensitive to that, so we just copy the two - // characters without checking (DecodeEscape will have advanced - // to the last character of the pair). - output->push_back('%'); - output->push_back(static_cast(spec[i - 1])); - output->push_back(static_cast(spec[i])); - } - } else { - // Invalid escape sequence. IE7 rejects any URLs with such - // sequences, while Firefox, IE6, and Safari all pass it through - // unchanged. We are more permissive unlike IE7. I don't think this - // can cause significant problems, if it does, we should change - // to be more like IE7. - output->push_back('%'); - } - - } else if (flags & INVALID_BIT) { - // For NULLs, etc. fail. - AppendEscapedChar(out_ch, output); - success = false; - - } else if (flags & ESCAPE_BIT) { - // This character should be escaped. - AppendEscapedChar(out_ch, output); - } - } else { - // Nothing special about this character, just append it. - output->push_back(out_ch); - } - } - } - return success; -} - -template -bool DoPath(const CHAR* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path) { - bool success = true; - out_path->begin = output->length(); - if (path.len > 0) { - // Write out an initial slash if the input has none. If we just parse a URL - // and then canonicalize it, it will of course have a slash already. This - // check is for the replacement and relative URL resolving cases of file - // URLs. - if (!url_parse::IsURLSlash(spec[path.begin])) - output->push_back('/'); - - success = DoPartialPath(spec, path, out_path->begin, output); - } else { - // No input, canonical path is a slash. - output->push_back('/'); - } - out_path->len = output->length() - out_path->begin; - return success; -} - -} // namespace - -bool CanonicalizePath(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path) { - return DoPath(spec, path, output, out_path); -} - -bool CanonicalizePath(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path) { - return DoPath(spec, path, output, out_path); -} - -bool CanonicalizePartialPath(const char* spec, - const url_parse::Component& path, - int path_begin_in_output, - CanonOutput* output) { - return DoPartialPath(spec, path, path_begin_in_output, - output); -} - -bool CanonicalizePartialPath(const char16* spec, - const url_parse::Component& path, - int path_begin_in_output, - CanonOutput* output) { - return DoPartialPath(spec, path, path_begin_in_output, - output); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_pathurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_pathurl.cc.svn-base deleted file mode 100644 index 4a990c7bc..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_pathurl.cc.svn-base +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Functions for canonicalizing "path" URLs. Not to be confused with the path -// of a URL, these are URLs that have no authority section, only a path. For -// example, "javascript:" and "data:". - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" - -namespace url_canon { - -namespace { - -template -bool DoCanonicalizePathURL(const URLComponentSource& source, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - // Scheme: this will append the colon. - bool success = CanonicalizeScheme(source.scheme, parsed.scheme, - output, &new_parsed->scheme); - - // We assume there's no authority for path URLs. Note that hosts should never - // have -1 length. - new_parsed->username.reset(); - new_parsed->password.reset(); - new_parsed->host.reset(); - new_parsed->port.reset(); - - if (parsed.path.is_valid()) { - // Copy the path using path URL's more lax escaping rules (think for - // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all - // ASCII characters alone. This helps readability of JavaStript. - new_parsed->path.begin = output->length(); - int end = parsed.path.end(); - for (int i = parsed.path.begin; i < end; i++) { - UCHAR uch = static_cast(source.path[i]); - if (uch < 0x20 || uch >= 0x80) - success &= AppendUTF8EscapedChar(source.path, &i, end, output); - else - output->push_back(static_cast(uch)); - } - new_parsed->path.len = output->length() - new_parsed->path.begin; - } else { - // Empty path. - new_parsed->path.reset(); - } - - // Assume there's no query or ref. - new_parsed->query.reset(); - new_parsed->ref.reset(); - - return success; -} - -} // namespace - -bool CanonicalizePathURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizePathURL( - URLComponentSource(spec), parsed, output, new_parsed); -} - -bool CanonicalizePathURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizePathURL( - URLComponentSource(spec), parsed, output, new_parsed); -} - -bool ReplacePathURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupOverrideComponents(base, replacements, &source, &parsed); - return DoCanonicalizePathURL( - source, parsed, output, new_parsed); -} - -bool ReplacePathURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - RawCanonOutput<1024> utf8; - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); - return DoCanonicalizePathURL( - source, parsed, output, new_parsed); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_query.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_query.cc.svn-base deleted file mode 100644 index cee8774c4..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_query.cc.svn-base +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" - -// Query canonicalization in IE -// ---------------------------- -// IE is very permissive for query parameters specified in links on the page -// (in contrast to links that it constructs itself based on form data). It does -// not unescape any character. It does not reject any escape sequence (be they -// invalid like "%2y" or freaky like %00). -// -// IE only escapes spaces and nothing else. Embedded NULLs, tabs (0x09), -// LF (0x0a), and CR (0x0d) are removed (this probably happens at an earlier -// layer since they are removed from all portions of the URL). All other -// characters are passed unmodified. Invalid UTF-16 sequences are preserved as -// well, with each character in the input being converted to UTF-8. It is the -// server's job to make sense of this invalid query. -// -// Invalid multibyte sequences (for example, invalid UTF-8 on a UTF-8 page) -// are converted to the invalid character and sent as unescaped UTF-8 (0xef, -// 0xbf, 0xbd). This may not be canonicalization, the parser may generate these -// strings before the URL handler ever sees them. -// -// Our query canonicalization -// -------------------------- -// We escape all non-ASCII characters and control characters, like Firefox. -// This is more conformant to the URL spec, and there do not seem to be many -// problems relating to Firefox's behavior. -// -// Like IE, we will never unescape (although the application may want to try -// unescaping to present the user with a more understandable URL). We will -// replace all invalid sequences (including invalid UTF-16 sequences, which IE -// doesn't) with the "invalid character," and we will escape it. - -namespace url_canon { - -namespace { - -// Returns true if the characters starting at |begin| and going until |end| -// (non-inclusive) are all representable in 7-bits. -template -bool IsAllASCII(const CHAR* spec, const url_parse::Component& query) { - int end = query.end(); - for (int i = query.begin; i < end; i++) { - if (static_cast(spec[i]) >= 0x80) - return false; - } - return true; -} - -// Appends the given string to the output, escaping characters that do not -// match the given |type| in SharedCharTypes. This version will accept 8 or 16 -// bit characters, but assumes that they have only 7-bit values. It also assumes -// that all UTF-8 values are correct, so doesn't bother checking -template -void AppendRaw8BitQueryString(const CHAR* source, int length, - CanonOutput* output) { - for (int i = 0; i < length; i++) { - if (!IsQueryChar(static_cast(source[i]))) - AppendEscapedChar(static_cast(source[i]), output); - else // Doesn't need escaping. - output->push_back(static_cast(source[i])); - } -} - -// Runs the converter on the given UTF-8 input. Since the converter expects -// UTF-16, we have to convert first. The converter must be non-NULL. -void RunConverter(const char* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output) { - // This function will replace any misencoded values with the invalid - // character. This is what we want so we don't have to check for error. - RawCanonOutputW<1024> utf16; - ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16); - converter->ConvertFromUTF16(utf16.data(), utf16.length(), output); -} - -// Runs the converter with the given UTF-16 input. We don't have to do -// anything, but this overriddden function allows us to use the same code -// for both UTF-8 and UTF-16 input. -void RunConverter(const char16* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output) { - converter->ConvertFromUTF16(&spec[query.begin], query.len, output); -} - -template -void DoConvertToQueryEncoding(const CHAR* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output) { - if (IsAllASCII(spec, query)) { - // Easy: the input can just appended with no character set conversions. - AppendRaw8BitQueryString(&spec[query.begin], query.len, output); - - } else { - // Harder: convert to the proper encoding first. - if (converter) { - // Run the converter to get an 8-bit string, then append it, escaping - // necessary values. - RawCanonOutput<1024> eight_bit; - RunConverter(spec, query, converter, &eight_bit); - AppendRaw8BitQueryString(eight_bit.data(), eight_bit.length(), output); - - } else { - // No converter, do our own UTF-8 conversion. - AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output); - } - } -} - -template -void DoCanonicalizeQuery(const CHAR* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query) { - if (query.len < 0) { - *out_query = url_parse::Component(); - return; - } - - output->push_back('?'); - out_query->begin = output->length(); - - DoConvertToQueryEncoding(spec, query, converter, output); - - out_query->len = output->length() - out_query->begin; -} - -} // namespace - -void CanonicalizeQuery(const char* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query) { - DoCanonicalizeQuery(spec, query, converter, - output, out_query); -} - -void CanonicalizeQuery(const char16* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query) { - DoCanonicalizeQuery(spec, query, converter, - output, out_query); -} - -void ConvertUTF16ToQueryEncoding(const char16* input, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output) { - DoConvertToQueryEncoding(input, query, - converter, output); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_relative.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_relative.cc.svn-base deleted file mode 100644 index 63630b451..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_relative.cc.svn-base +++ /dev/null @@ -1,579 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Canonicalizer functions for working with and resolving relative URLs. - -#include "base/logging.h" -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_parse_internal.h" -#include "googleurl/src/url_util_internal.h" - -namespace url_canon { - -namespace { - -// Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug -// 379034), whereas IE is case-insensetive. -// -// We choose to be more permissive like IE. We don't need to worry about -// unescaping or anything here: neither IE or Firefox allow this. We also -// don't have to worry about invalid scheme characters since we are comparing -// against the canonical scheme of the base. -// -// The base URL should always be canonical, therefore is ASCII. -template -bool AreSchemesEqual(const char* base, - const url_parse::Component& base_scheme, - const CHAR* cmp, - const url_parse::Component& cmp_scheme) { - if (base_scheme.len != cmp_scheme.len) - return false; - for (int i = 0; i < base_scheme.len; i++) { - // We assume the base is already canonical, so we don't have to - // canonicalize it. - if (CanonicalSchemeChar(cmp[cmp_scheme.begin + i]) != - base[base_scheme.begin + i]) - return false; - } - return true; -} - -#ifdef WIN32 - -// Here, we also allow Windows paths to be represented as "/C:/" so we can be -// consistent about URL paths beginning with slashes. This function is like -// DoesBeginWindowsDrivePath except that it also requires a slash at the -// beginning. -template -bool DoesBeginSlashWindowsDriveSpec(const CHAR* spec, int start_offset, - int spec_len) { - if (start_offset >= spec_len) - return false; - return url_parse::IsURLSlash(spec[start_offset]) && - url_parse::DoesBeginWindowsDriveSpec(spec, start_offset + 1, spec_len); -} - -#endif // WIN32 - -// See IsRelativeURL in the header file for usage. -template -bool DoIsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const CHAR* url, - int url_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component) { - *is_relative = false; // So we can default later to not relative. - - // Trim whitespace and construct a new range for the substring. - int begin = 0; - url_parse::TrimURL(url, &begin, &url_len); - if (begin >= url_len) { - // Empty URLs are relative, but do nothing. - *relative_component = url_parse::Component(begin, 0); - *is_relative = true; - return true; - } - -#ifdef WIN32 - // We special case paths like "C:\foo" so they can link directly to the - // file on Windows (IE compatability). The security domain stuff should - // prevent a link like this from actually being followed if its on a - // web page. - // - // We treat "C:/foo" as an absolute URL. We can go ahead and treat "/c:/" - // as relative, as this will just replace the path when the base scheme - // is a file and the answer will still be correct. - // - // We require strict backslashes when detecting UNC since two forward - // shashes should be treated a a relative URL with a hostname. - if (url_parse::DoesBeginWindowsDriveSpec(url, begin, url_len) || - url_parse::DoesBeginUNCPath(url, begin, url_len, true)) - return true; -#endif // WIN32 - - // See if we've got a scheme, if not, we know this is a relative URL. - // BUT: Just because we have a scheme, doesn't make it absolute. - // "http:foo.html" is a relative URL with path "foo.html". If the scheme is - // empty, we treat it as relative (":foo") like IE does. - url_parse::Component scheme; - if (!url_parse::ExtractScheme(url, url_len, &scheme) || scheme.len == 0) { - // Don't allow relative URLs if the base scheme doesn't support it. - if (!is_base_hierarchical) - return false; - - *relative_component = url_parse::MakeRange(begin, url_len); - *is_relative = true; - return true; - } - - // If the scheme isn't valid, then it's relative. - int scheme_end = scheme.end(); - for (int i = scheme.begin; i < scheme_end; i++) { - if (!CanonicalSchemeChar(url[i])) { - *relative_component = url_parse::MakeRange(begin, url_len); - *is_relative = true; - return true; - } - } - - // If the scheme is not the same, then we can't count it as relative. - if (!AreSchemesEqual(base, base_parsed.scheme, url, scheme)) - return true; - - // When the scheme that they both share is not hierarchical, treat the - // incoming scheme as absolute (this way with the base of "data:foo", - // "data:bar" will be reported as absolute. - if (!is_base_hierarchical) - return true; - - int colon_offset = scheme.end(); - - // If it's a filesystem URL, the only valid way to make it relative is not to - // supply a scheme. There's no equivalent to e.g. http:index.html. - if (url_util::CompareSchemeComponent(url, scheme, "filesystem")) - return true; - - // ExtractScheme guarantees that the colon immediately follows what it - // considers to be the scheme. CountConsecutiveSlashes will handle the - // case where the begin offset is the end of the input. - int num_slashes = url_parse::CountConsecutiveSlashes(url, colon_offset + 1, - url_len); - - if (num_slashes == 0 || num_slashes == 1) { - // No slashes means it's a relative path like "http:foo.html". One slash - // is an absolute path. "http:/home/foo.html" - *is_relative = true; - *relative_component = url_parse::MakeRange(colon_offset + 1, url_len); - return true; - } - - // Two or more slashes after the scheme we treat as absolute. - return true; -} - -// Copies all characters in the range [begin, end) of |spec| to the output, -// up until and including the last slash. There should be a slash in the -// range, if not, nothing will be copied. -// -// The input is assumed to be canonical, so we search only for exact slashes -// and not backslashes as well. We also know that it's ASCII. -void CopyToLastSlash(const char* spec, - int begin, - int end, - CanonOutput* output) { - // Find the last slash. - int last_slash = -1; - for (int i = end - 1; i >= begin; i--) { - if (spec[i] == '/') { - last_slash = i; - break; - } - } - if (last_slash < 0) - return; // No slash. - - // Copy. - for (int i = begin; i <= last_slash; i++) - output->push_back(spec[i]); -} - -// Copies a single component from the source to the output. This is used -// when resolving relative URLs and a given component is unchanged. Since the -// source should already be canonical, we don't have to do anything special, -// and the input is ASCII. -void CopyOneComponent(const char* source, - const url_parse::Component& source_component, - CanonOutput* output, - url_parse::Component* output_component) { - if (source_component.len < 0) { - // This component is not present. - *output_component = url_parse::Component(); - return; - } - - output_component->begin = output->length(); - int source_end = source_component.end(); - for (int i = source_component.begin; i < source_end; i++) - output->push_back(source[i]); - output_component->len = output->length() - output_component->begin; -} - -#ifdef WIN32 - -// Called on Windows when the base URL is a file URL, this will copy the "C:" -// to the output, if there is a drive letter and if that drive letter is not -// being overridden by the relative URL. Otherwise, do nothing. -// -// It will return the index of the beginning of the next character in the -// base to be processed: if there is a "C:", the slash after it, or if -// there is no drive letter, the slash at the beginning of the path, or -// the end of the base. This can be used as the starting offset for further -// path processing. -template -int CopyBaseDriveSpecIfNecessary(const char* base_url, - int base_path_begin, - int base_path_end, - const CHAR* relative_url, - int path_start, - int relative_url_len, - CanonOutput* output) { - if (base_path_begin >= base_path_end) - return base_path_begin; // No path. - - // If the relative begins with a drive spec, don't do anything. The existing - // drive spec in the base will be replaced. - if (url_parse::DoesBeginWindowsDriveSpec(relative_url, - path_start, relative_url_len)) { - return base_path_begin; // Relative URL path is "C:/foo" - } - - // The path should begin with a slash (as all canonical paths do). We check - // if it is followed by a drive letter and copy it. - if (DoesBeginSlashWindowsDriveSpec(base_url, - base_path_begin, - base_path_end)) { - // Copy the two-character drive spec to the output. It will now look like - // "file:///C:" so the rest of it can be treated like a standard path. - output->push_back('/'); - output->push_back(base_url[base_path_begin + 1]); - output->push_back(base_url[base_path_begin + 2]); - return base_path_begin + 3; - } - - return base_path_begin; -} - -#endif // WIN32 - -// A subroutine of DoResolveRelativeURL, this resolves the URL knowning that -// the input is a relative path or less (qyuery or ref). -template -bool DoResolveRelativePath(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const CHAR* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed) { - bool success = true; - - // We know the authority section didn't change, copy it to the output. We - // also know we have a path so can copy up to there. - url_parse::Component path, query, ref; - url_parse::ParsePathInternal(relative_url, - relative_component, - &path, - &query, - &ref); - // Canonical URLs always have a path, so we can use that offset. - output->Append(base_url, base_parsed.path.begin); - - if (path.len > 0) { - // The path is replaced or modified. - int true_path_begin = output->length(); - - // For file: URLs on Windows, we don't want to treat the drive letter and - // colon as part of the path for relative file resolution when the - // incoming URL does not provide a drive spec. We save the true path - // beginning so we can fix it up after we are done. - int base_path_begin = base_parsed.path.begin; -#ifdef WIN32 - if (base_is_file) { - base_path_begin = CopyBaseDriveSpecIfNecessary( - base_url, base_parsed.path.begin, base_parsed.path.end(), - relative_url, relative_component.begin, relative_component.end(), - output); - // Now the output looks like either "file://" or "file:///C:" - // and we can start appending the rest of the path. |base_path_begin| - // points to the character in the base that comes next. - } -#endif // WIN32 - - if (url_parse::IsURLSlash(relative_url[path.begin])) { - // Easy case: the path is an absolute path on the server, so we can - // just replace everything from the path on with the new versions. - // Since the input should be canonical hierarchical URL, we should - // always have a path. - success &= CanonicalizePath(relative_url, path, - output, &out_parsed->path); - } else { - // Relative path, replace the query, and reference. We take the - // original path with the file part stripped, and append the new path. - // The canonicalizer will take care of resolving ".." and "." - int path_begin = output->length(); - CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(), - output); - success &= CanonicalizePartialPath(relative_url, path, path_begin, - output); - out_parsed->path = url_parse::MakeRange(path_begin, output->length()); - - // Copy the rest of the stuff after the path from the relative path. - } - - // Finish with the query and reference part (these can't fail). - CanonicalizeQuery(relative_url, query, query_converter, - output, &out_parsed->query); - CanonicalizeRef(relative_url, ref, output, &out_parsed->ref); - - // Fix the path beginning to add back the "C:" we may have written above. - out_parsed->path = url_parse::MakeRange(true_path_begin, - out_parsed->path.end()); - return success; - } - - // If we get here, the path is unchanged: copy to output. - CopyOneComponent(base_url, base_parsed.path, output, &out_parsed->path); - - if (query.is_valid()) { - // Just the query specified, replace the query and reference (ignore - // failures for refs) - CanonicalizeQuery(relative_url, query, query_converter, - output, &out_parsed->query); - CanonicalizeRef(relative_url, ref, output, &out_parsed->ref); - return success; - } - - // If we get here, the query is unchanged: copy to output. Note that the - // range of the query parameter doesn't include the question mark, so we - // have to add it manually if there is a component. - if (base_parsed.query.is_valid()) - output->push_back('?'); - CopyOneComponent(base_url, base_parsed.query, output, &out_parsed->query); - - if (ref.is_valid()) { - // Just the reference specified: replace it (ignoring failures). - CanonicalizeRef(relative_url, ref, output, &out_parsed->ref); - return success; - } - - // We should always have something to do in this function, the caller checks - // that some component is being replaced. - DCHECK(false) << "Not reached"; - return success; -} - -// Resolves a relative URL that contains a host. Typically, these will -// be of the form "//www.google.com/foo/bar?baz#ref" and the only thing which -// should be kept from the original URL is the scheme. -template -bool DoResolveRelativeHost(const char* base_url, - const url_parse::Parsed& base_parsed, - const CHAR* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed) { - // Parse the relative URL, just like we would for anything following a - // scheme. - url_parse::Parsed relative_parsed; // Everything but the scheme is valid. - url_parse::ParseAfterScheme(&relative_url[relative_component.begin], - relative_component.len, relative_component.begin, - &relative_parsed); - - // Now we can just use the replacement function to replace all the necessary - // parts of the old URL with the new one. - Replacements replacements; - replacements.SetUsername(relative_url, relative_parsed.username); - replacements.SetPassword(relative_url, relative_parsed.password); - replacements.SetHost(relative_url, relative_parsed.host); - replacements.SetPort(relative_url, relative_parsed.port); - replacements.SetPath(relative_url, relative_parsed.path); - replacements.SetQuery(relative_url, relative_parsed.query); - replacements.SetRef(relative_url, relative_parsed.ref); - - return ReplaceStandardURL(base_url, base_parsed, replacements, - query_converter, output, out_parsed); -} - -// Resolves a relative URL that happens to be an absolute file path. Examples -// include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo". -template -bool DoResolveAbsoluteFile(const CHAR* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed) { - // Parse the file URL. The file URl parsing function uses the same logic - // as we do for determining if the file is absolute, in which case it will - // not bother to look for a scheme. - url_parse::Parsed relative_parsed; - url_parse::ParseFileURL(&relative_url[relative_component.begin], - relative_component.len, &relative_parsed); - - return CanonicalizeFileURL(&relative_url[relative_component.begin], - relative_component.len, relative_parsed, - query_converter, output, out_parsed); -} - -// TODO(brettw) treat two slashes as root like Mozilla for FTP? -template -bool DoResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const CHAR* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed) { - // Starting point for our output parsed. We'll fix what we change. - *out_parsed = base_parsed; - - // Sanity check: the input should have a host or we'll break badly below. - // We can only resolve relative URLs with base URLs that have hosts and - // paths (even the default path of "/" is OK). - // - // We allow hosts with no length so we can handle file URLs, for example. - if (base_parsed.path.len <= 0) { - // On error, return the input (resolving a relative URL on a non-relative - // base = the base). - int base_len = base_parsed.Length(); - for (int i = 0; i < base_len; i++) - output->push_back(base_url[i]); - return false; - } - - if (relative_component.len <= 0) { - // Empty relative URL, leave unchanged, only removing the ref component. - int base_len = base_parsed.Length(); - base_len -= base_parsed.ref.len + 1; - out_parsed->ref.reset(); - output->Append(base_url, base_len); - return true; - } - - int num_slashes = url_parse::CountConsecutiveSlashes( - relative_url, relative_component.begin, relative_component.end()); - -#ifdef WIN32 - // On Windows, two slashes for a file path (regardless of which direction - // they are) means that it's UNC. Two backslashes on any base scheme mean - // that it's an absolute UNC path (we use the base_is_file flag to control - // how strict the UNC finder is). - // - // We also allow Windows absolute drive specs on any scheme (for example - // "c:\foo") like IE does. There must be no preceeding slashes in this - // case (we reject anything like "/c:/foo") because that should be treated - // as a path. For file URLs, we allow any number of slashes since that would - // be setting the path. - // - // This assumes the absolute path resolver handles absolute URLs like this - // properly. url_util::DoCanonicalize does this. - int after_slashes = relative_component.begin + num_slashes; - if (url_parse::DoesBeginUNCPath(relative_url, relative_component.begin, - relative_component.end(), !base_is_file) || - ((num_slashes == 0 || base_is_file) && - url_parse::DoesBeginWindowsDriveSpec(relative_url, after_slashes, - relative_component.end()))) { - return DoResolveAbsoluteFile(relative_url, relative_component, - query_converter, output, out_parsed); - } -#else - // Other platforms need explicit handling for file: URLs with multiple - // slashes because the generic scheme parsing always extracts a host, but a - // file: URL only has a host if it has exactly 2 slashes. This also - // handles the special case where the URL is only slashes, since that - // doesn't have a host part either. - if (base_is_file && - (num_slashes > 2 || num_slashes == relative_component.len)) { - return DoResolveAbsoluteFile(relative_url, relative_component, - query_converter, output, out_parsed); - } -#endif - - // Any other double-slashes mean that this is relative to the scheme. - if (num_slashes >= 2) { - return DoResolveRelativeHost(base_url, base_parsed, - relative_url, relative_component, - query_converter, output, out_parsed); - } - - // When we get here, we know that the relative URL is on the same host. - return DoResolveRelativePath(base_url, base_parsed, base_is_file, - relative_url, relative_component, - query_converter, output, out_parsed); -} - -} // namespace - -bool IsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const char* fragment, - int fragment_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component) { - return DoIsRelativeURL( - base, base_parsed, fragment, fragment_len, is_base_hierarchical, - is_relative, relative_component); -} - -bool IsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const char16* fragment, - int fragment_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component) { - return DoIsRelativeURL( - base, base_parsed, fragment, fragment_len, is_base_hierarchical, - is_relative, relative_component); -} - -bool ResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const char* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed) { - return DoResolveRelativeURL( - base_url, base_parsed, base_is_file, relative_url, - relative_component, query_converter, output, out_parsed); -} - -bool ResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const char16* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed) { - return DoResolveRelativeURL( - base_url, base_parsed, base_is_file, relative_url, - relative_component, query_converter, output, out_parsed); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdstring.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdstring.h.svn-base deleted file mode 100644 index 21272e035..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdstring.h.svn-base +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// This header file defines a canonicalizer output method class for STL -// strings. Because the canonicalizer tries not to be dependent on the STL, -// we have segregated it here. - -#ifndef GOOGLEURL_SRC_URL_CANON_STDSTRING_H__ -#define GOOGLEURL_SRC_URL_CANON_STDSTRING_H__ - -#include -#include "googleurl/src/url_canon.h" - -namespace url_canon { - -// Write into a std::string given in the constructor. This object does not own -// the string itself, and the user must ensure that the string stays alive -// throughout the lifetime of this object. -// -// The given string will be appended to; any existing data in the string will -// be preserved. The caller should reserve() the amount of data in the string -// they expect to be written. We will resize if necessary, but that's slow. -// -// Note that when canonicalization is complete, the string will likely have -// unused space at the end because we make the string very big to start out -// with (by |initial_size|). This ends up being important because resize -// operations are slow, and because the base class needs to write directly -// into the buffer. -// -// Therefore, the user should call Complete() before using the string that -// this class wrote into. -class StdStringCanonOutput : public CanonOutput { - public: - StdStringCanonOutput(std::string* str) - : CanonOutput(), - str_(str) { - cur_len_ = static_cast(str_->size()); // Append to existing data. - str_->resize(str_->capacity()); - buffer_ = str_->empty() ? NULL : &(*str_)[0]; - buffer_len_ = static_cast(str_->size()); - } - virtual ~StdStringCanonOutput() { - // Nothing to do, we don't own the string. - } - - // Must be called after writing has completed but before the string is used. - void Complete() { - str_->resize(cur_len_); - buffer_len_ = cur_len_; - } - - virtual void Resize(int sz) { - str_->resize(sz); - buffer_ = str_->empty() ? NULL : &(*str_)[0]; - buffer_len_ = sz; - } - - protected: - std::string* str_; -}; - -// An extension of the Replacements class that allows the setters to use -// standard strings. -// -// The strings passed as arguments are not copied and must remain valid until -// this class goes out of scope. -template -class StdStringReplacements : - public url_canon::Replacements { - public: - void SetSchemeStr(const STR& s) { - this->SetScheme(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetUsernameStr(const STR& s) { - this->SetUsername(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetPasswordStr(const STR& s) { - this->SetPassword(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetHostStr(const STR& s) { - this->SetHost(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetPortStr(const STR& s) { - this->SetPort(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetPathStr(const STR& s) { - this->SetPath(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetQueryStr(const STR& s) { - this->SetQuery(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } - void SetRefStr(const STR& s) { - this->SetRef(s.data(), - url_parse::Component(0, static_cast(s.length()))); - } -}; - -} // namespace url_canon - -#endif // GOOGLEURL_SRC_URL_CANON_STDSTRING_H__ - diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdurl.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdurl.cc.svn-base deleted file mode 100644 index 1e21a147c..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_stdurl.cc.svn-base +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Functions to canonicalize "standard" URLs, which are ones that have an -// authority section including a host name. - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_internal.h" - -namespace url_canon { - -namespace { - -template -bool DoCanonicalizeStandardURL(const URLComponentSource& source, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - // Scheme: this will append the colon. - bool success = CanonicalizeScheme(source.scheme, parsed.scheme, - output, &new_parsed->scheme); - - // Authority (username, password, host, port) - bool have_authority; - if (parsed.username.is_valid() || parsed.password.is_valid() || - parsed.host.is_nonempty() || parsed.port.is_valid()) { - have_authority = true; - - // Only write the authority separators when we have a scheme. - if (parsed.scheme.is_valid()) { - output->push_back('/'); - output->push_back('/'); - } - - // User info: the canonicalizer will handle the : and @. - success &= CanonicalizeUserInfo(source.username, parsed.username, - source.password, parsed.password, - output, - &new_parsed->username, - &new_parsed->password); - - success &= CanonicalizeHost(source.host, parsed.host, - output, &new_parsed->host); - - // Host must not be empty for standard URLs. - if (!parsed.host.is_nonempty()) - success = false; - - // Port: the port canonicalizer will handle the colon. - int default_port = DefaultPortForScheme( - &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len); - success &= CanonicalizePort(source.port, parsed.port, default_port, - output, &new_parsed->port); - } else { - // No authority, clear the components. - have_authority = false; - new_parsed->host.reset(); - new_parsed->username.reset(); - new_parsed->password.reset(); - new_parsed->port.reset(); - success = false; // Standard URLs must have an authority. - } - - // Path - if (parsed.path.is_valid()) { - success &= CanonicalizePath(source.path, parsed.path, - output, &new_parsed->path); - } else if (have_authority || - parsed.query.is_valid() || parsed.ref.is_valid()) { - // When we have an empty path, make up a path when we have an authority - // or something following the path. The only time we allow an empty - // output path is when there is nothing else. - new_parsed->path = url_parse::Component(output->length(), 1); - output->push_back('/'); - } else { - // No path at all - new_parsed->path.reset(); - } - - // Query - CanonicalizeQuery(source.query, parsed.query, query_converter, - output, &new_parsed->query); - - // Ref: ignore failure for this, since the page can probably still be loaded. - CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); - - return success; -} - -} // namespace - - -// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED -// if the scheme is unknown. -int DefaultPortForScheme(const char* scheme, int scheme_len) { - int default_port = url_parse::PORT_UNSPECIFIED; - switch (scheme_len) { - case 4: - if (!strncmp(scheme, "http", scheme_len)) - default_port = 80; - break; - case 5: - if (!strncmp(scheme, "https", scheme_len)) - default_port = 443; - break; - case 3: - if (!strncmp(scheme, "ftp", scheme_len)) - default_port = 21; - else if (!strncmp(scheme, "wss", scheme_len)) - default_port = 443; - break; - case 6: - if (!strncmp(scheme, "gopher", scheme_len)) - default_port = 70; - break; - case 2: - if (!strncmp(scheme, "ws", scheme_len)) - default_port = 80; - break; - } - return default_port; -} - -bool CanonicalizeStandardURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeStandardURL( - URLComponentSource(spec), parsed, query_converter, - output, new_parsed); -} - -bool CanonicalizeStandardURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - return DoCanonicalizeStandardURL( - URLComponentSource(spec), parsed, query_converter, - output, new_parsed); -} - -// It might be nice in the future to optimize this so unchanged components don't -// need to be recanonicalized. This is especially true since the common case for -// ReplaceComponents is removing things we don't want, like reference fragments -// and usernames. These cases can become more efficient if we can assume the -// rest of the URL is OK with these removed (or only the modified parts -// recanonicalized). This would be much more complex to implement, however. -// -// You would also need to update DoReplaceComponents in url_util.cc which -// relies on this re-checking everything (see the comment there for why). -bool ReplaceStandardURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupOverrideComponents(base, replacements, &source, &parsed); - return DoCanonicalizeStandardURL( - source, parsed, query_converter, output, new_parsed); -} - -// For 16-bit replacements, we turn all the replacements into UTF-8 so the -// regular codepath can be used. -bool ReplaceStandardURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed) { - RawCanonOutput<1024> utf8; - URLComponentSource source(base); - url_parse::Parsed parsed(base_parsed); - SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); - return DoCanonicalizeStandardURL( - source, parsed, query_converter, output, new_parsed); -} - -} // namespace url_canon diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_unittest.cc.svn-base deleted file mode 100644 index 0c57f55e1..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_canon_unittest.cc.svn-base +++ /dev/null @@ -1,2133 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_icu.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_canon_stdstring.h" -#include "googleurl/src/url_parse.h" -#include "googleurl/src/url_test_utils.h" -#include "testing/gtest/include/gtest/gtest.h" - -// Some implementations of base/basictypes.h may define ARRAYSIZE. -// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro -// which is in our version of basictypes.h. -#ifndef ARRAYSIZE -#define ARRAYSIZE ARRAYSIZE_UNSAFE -#endif - -using url_test_utils::WStringToUTF16; -using url_test_utils::ConvertUTF8ToUTF16; -using url_test_utils::ConvertUTF16ToUTF8; -using url_canon::CanonHostInfo; - -namespace { - -struct ComponentCase { - const char* input; - const char* expected; - url_parse::Component expected_component; - bool expected_success; -}; - -// ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests -// treat each input as optional, and will only try processing if non-NULL. -// The output is always 8-bit. -struct DualComponentCase { - const char* input8; - const wchar_t* input16; - const char* expected; - url_parse::Component expected_component; - bool expected_success; -}; - -// Test cases for CanonicalizeIPAddress(). The inputs are identical to -// DualComponentCase, but the output has extra CanonHostInfo fields. -struct IPAddressCase { - const char* input8; - const wchar_t* input16; - const char* expected; - url_parse::Component expected_component; - - // CanonHostInfo fields, for verbose output. - CanonHostInfo::Family expected_family; - int expected_num_ipv4_components; - const char* expected_address_hex; // Two hex chars per IP address byte. -}; - -std::string BytesToHexString(unsigned char bytes[16], int length) { - EXPECT_TRUE(length == 0 || length == 4 || length == 16) - << "Bad IP address length: " << length; - std::string result; - for (int i = 0; i < length; ++i) { - result.push_back(url_canon::kHexCharLookup[(bytes[i] >> 4) & 0xf]); - result.push_back(url_canon::kHexCharLookup[bytes[i] & 0xf]); - } - return result; -} - -struct ReplaceCase { - const char* base; - const char* scheme; - const char* username; - const char* password; - const char* host; - const char* port; - const char* path; - const char* query; - const char* ref; - const char* expected; -}; - -// Wrapper around a UConverter object that managers creation and destruction. -class UConvScoper { - public: - explicit UConvScoper(const char* charset_name) { - UErrorCode err = U_ZERO_ERROR; - converter_ = ucnv_open(charset_name, &err); - } - - ~UConvScoper() { - if (converter_) - ucnv_close(converter_); - } - - // Returns the converter object, may be NULL. - UConverter* converter() const { return converter_; } - - private: - UConverter* converter_; -}; - -// Magic string used in the replacements code that tells SetupReplComp to -// call the clear function. -const char kDeleteComp[] = "|"; - -// Sets up a replacement for a single component. This is given pointers to -// the set and clear function for the component being replaced, and will -// either set the component (if it exists) or clear it (if the replacement -// string matches kDeleteComp). -// -// This template is currently used only for the 8-bit case, and the strlen -// causes it to fail in other cases. It is left a template in case we have -// tests for wide replacements. -template -void SetupReplComp( - void (url_canon::Replacements::*set)(const CHAR*, - const url_parse::Component&), - void (url_canon::Replacements::*clear)(), - url_canon::Replacements* rep, - const CHAR* str) { - if (str && str[0] == kDeleteComp[0]) { - (rep->*clear)(); - } else if (str) { - (rep->*set)(str, url_parse::Component(0, static_cast(strlen(str)))); - } -} - -} // namespace - -TEST(URLCanonTest, DoAppendUTF8) { - struct UTF8Case { - unsigned input; - const char* output; - } utf_cases[] = { - // Valid code points. - {0x24, "\x24"}, - {0xA2, "\xC2\xA2"}, - {0x20AC, "\xE2\x82\xAC"}, - {0x24B62, "\xF0\xA4\xAD\xA2"}, - {0x10FFFF, "\xF4\x8F\xBF\xBF"}, - }; - std::string out_str; - for (size_t i = 0; i < ARRAYSIZE(utf_cases); i++) { - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - url_canon::AppendUTF8Value(utf_cases[i].input, &output); - output.Complete(); - EXPECT_EQ(utf_cases[i].output, out_str); - } -} - -// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will -// cause the Chromium stacktrace dialog to appear and hang the test. -// See http://crbug.com/49580. -#if defined(GTEST_HAS_DEATH_TEST) && defined(NDEBUG) -TEST(URLCanonTest, DoAppendUTF8Invalid) { - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - // Invalid code point (too large). - ASSERT_DEBUG_DEATH({ - url_canon::AppendUTF8Value(0x110000, &output); - output.Complete(); - EXPECT_EQ("", out_str); - }, ""); -} -#endif - -TEST(URLCanonTest, UTF) { - // Low-level test that we handle reading, canonicalization, and writing - // UTF-8/UTF-16 strings properly. - struct UTFCase { - const char* input8; - const wchar_t* input16; - bool expected_success; - const char* output; - } utf_cases[] = { - // Valid canonical input should get passed through & escaped. - {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"}, - // Test a characer that takes > 16 bits (U+10300 = old italic letter A) - {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"}, - // Non-shortest-form UTF-8 are invalid. The bad char should be replaced - // with the invalid character (EF BF DB in UTF-8). - {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"}, - // Invalid UTF-8 sequences should be marked as invalid (the first - // sequence is truncated). - {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"}, - // Character going off the end. - {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"}, - // ...same with low surrogates with no high surrogate. - {"\xed\xb0\x80", L"\xdc00", false, "%EF%BF%BD"}, - // Test a UTF-8 encoded surrogate value is marked as invalid. - // ED A0 80 = U+D800 - {"\xed\xa0\x80", NULL, false, "%EF%BF%BD"}, - }; - - std::string out_str; - for (size_t i = 0; i < ARRAYSIZE(utf_cases); i++) { - if (utf_cases[i].input8) { - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - - int input_len = static_cast(strlen(utf_cases[i].input8)); - bool success = true; - for (int ch = 0; ch < input_len; ch++) { - success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len, - &output); - } - output.Complete(); - EXPECT_EQ(utf_cases[i].expected_success, success); - EXPECT_EQ(std::string(utf_cases[i].output), out_str); - } - if (utf_cases[i].input16) { - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - - string16 input_str(WStringToUTF16(utf_cases[i].input16)); - int input_len = static_cast(input_str.length()); - bool success = true; - for (int ch = 0; ch < input_len; ch++) { - success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len, - &output); - } - output.Complete(); - EXPECT_EQ(utf_cases[i].expected_success, success); - EXPECT_EQ(std::string(utf_cases[i].output), out_str); - } - - if (utf_cases[i].input8 && utf_cases[i].input16 && - utf_cases[i].expected_success) { - // Check that the UTF-8 and UTF-16 inputs are equivalent. - - // UTF-16 -> UTF-8 - std::string input8_str(utf_cases[i].input8); - string16 input16_str(WStringToUTF16(utf_cases[i].input16)); - EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str)); - - // UTF-8 -> UTF-16 - EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str)); - } - } -} - -TEST(URLCanonTest, ICUCharsetConverter) { - struct ICUCase { - const wchar_t* input; - const char* encoding; - const char* expected; - } icu_cases[] = { - // UTF-8. - {L"Hello, world", "utf-8", "Hello, world"}, - {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, - // Non-BMP UTF-8. - {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, - // Big5 - {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, - // Unrepresentable character in the destination set. - {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6eworld"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) { - UConvScoper conv(icu_cases[i].encoding); - ASSERT_TRUE(conv.converter() != NULL); - url_canon::ICUCharsetConverter converter(conv.converter()); - - std::string str; - url_canon::StdStringCanonOutput output(&str); - - string16 input_str(WStringToUTF16(icu_cases[i].input)); - int input_len = static_cast(input_str.length()); - converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); - output.Complete(); - - EXPECT_STREQ(icu_cases[i].expected, str.c_str()); - } - - // Test string sizes around the resize boundary for the output to make sure - // the converter resizes as needed. - const int static_size = 16; - UConvScoper conv("utf-8"); - ASSERT_TRUE(conv.converter()); - url_canon::ICUCharsetConverter converter(conv.converter()); - for (int i = static_size - 2; i <= static_size + 2; i++) { - // Make a string with the appropriate length. - string16 input; - for (int ch = 0; ch < i; ch++) - input.push_back('a'); - - url_canon::RawCanonOutput output; - converter.ConvertFromUTF16(input.c_str(), static_cast(input.length()), - &output); - EXPECT_EQ(input.length(), static_cast(output.length())); - } -} - -TEST(URLCanonTest, Scheme) { - // Here, we're mostly testing that unusual characters are handled properly. - // The canonicalizer doesn't do any parsing or whitespace detection. It will - // also do its best on error, and will escape funny sequences (these won't be - // valid schemes and it will return error). - // - // Note that the canonicalizer will append a colon to the output to separate - // out the rest of the URL, which is not present in the input. We check, - // however, that the output range includes everything but the colon. - ComponentCase scheme_cases[] = { - {"http", "http:", url_parse::Component(0, 4), true}, - {"HTTP", "http:", url_parse::Component(0, 4), true}, - {" HTTP ", "%20http%20:", url_parse::Component(0, 10), false}, - {"htt: ", "htt%3A%20:", url_parse::Component(0, 9), false}, - {"\xe4\xbd\xa0\xe5\xa5\xbdhttp", "%E4%BD%A0%E5%A5%BDhttp:", url_parse::Component(0, 22), false}, - // Don't re-escape something already escaped. Note that it will - // "canonicalize" the 'A' to 'a', but that's OK. - {"ht%3Atp", "ht%3atp:", url_parse::Component(0, 7), false}, - }; - - std::string out_str; - - for (size_t i = 0; i < arraysize(scheme_cases); i++) { - int url_len = static_cast(strlen(scheme_cases[i].input)); - url_parse::Component in_comp(0, url_len); - url_parse::Component out_comp; - - out_str.clear(); - url_canon::StdStringCanonOutput output1(&out_str); - bool success = url_canon::CanonicalizeScheme(scheme_cases[i].input, - in_comp, &output1, &out_comp); - output1.Complete(); - - EXPECT_EQ(scheme_cases[i].expected_success, success); - EXPECT_EQ(std::string(scheme_cases[i].expected), out_str); - EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len); - - // Now try the wide version - out_str.clear(); - url_canon::StdStringCanonOutput output2(&out_str); - - string16 wide_input(ConvertUTF8ToUTF16(scheme_cases[i].input)); - in_comp.len = static_cast(wide_input.length()); - success = url_canon::CanonicalizeScheme(wide_input.c_str(), in_comp, - &output2, &out_comp); - output2.Complete(); - - EXPECT_EQ(scheme_cases[i].expected_success, success); - EXPECT_EQ(std::string(scheme_cases[i].expected), out_str); - EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len); - } - - // Test the case where the scheme is declared nonexistant, it should be - // converted into an empty scheme. - url_parse::Component out_comp; - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - - EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1), - &output, &out_comp)); - output.Complete(); - - EXPECT_EQ(std::string(":"), out_str); - EXPECT_EQ(0, out_comp.begin); - EXPECT_EQ(0, out_comp.len); -} - -TEST(URLCanonTest, Host) { - IPAddressCase host_cases[] = { - // Basic canonicalization, uppercase should be converted to lowercase. - {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, - // Spaces and some other characters should be escaped. - {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", url_parse::Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""}, - // Exciting different types of spaces! - {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""}, - // Other types of space (no-break, zero-width, zero-width-no-break) are - // name-prepped away to nothing. - {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, - // Ideographic full stop (full-width period for Chinese, etc.) should be - // treated as a dot. - {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid unicode characters should fail... - // ...In wide input, ICU will barf and we'll end up with the input as - // escaped UTF-8 (the invalid character should be replaced with the - // replacement character). - {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, - // ...This is the same as previous but with with escaped. - {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, - // Test name prepping, fullwidth input should be converted to ASCII and NOT - // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16. - {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""}, - // Test that fullwidth escaped values are properly name-prepped, - // then converted or rejected. - // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) - {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.com", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, - {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, - // ...%00 in fullwidth should fail (also as escaped UTF-8 input) - {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, - {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, - // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN - {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, - // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped - // UTF-8 (wide case). The output should be equivalent to the true wide - // character input above). - {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid escaped characters should fail and the percents should be - // escaped. - {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), CanonHostInfo::BROKEN, -1, ""}, - // If we get an invalid character that has been escaped. - {"%25", L"%25", "%25", url_parse::Component(0, 3), CanonHostInfo::BROKEN, -1, ""}, - {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), CanonHostInfo::BROKEN, -1, ""}, - // Escaped numbers should be treated like IP addresses if they are. - {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - // Invalid escaping should trigger the regular host error handling. - {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1, ""}, - // Something that isn't exactly an IP should get treated as a host and - // spaces escaped. - {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse::Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, - // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. - // These are "0Xc0.0250.01" in fullwidth. - {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - // Broken IP addresses get marked as such. - {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0, 13), CanonHostInfo::BROKEN, -1, ""}, - {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12), CanonHostInfo::BROKEN, -1, ""}, - // Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e. - // if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect). - {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", url_parse::Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""}, - // Address with all hexidecimal characters with leading number of 1<<32 - // or greater and should return NEUTRAL rather than BROKEN if not all - // components are numbers. - {"12345678912345.de", L"12345678912345.de", "12345678912345.de", url_parse::Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""}, - {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", url_parse::Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, - {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "12345678912345.12345678912345.de", url_parse::Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""}, - {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", url_parse::Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, - {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", url_parse::Component(0, 19), CanonHostInfo::BROKEN, -1, ""}, - }; - - // CanonicalizeHost() non-verbose. - std::string out_str; - for (size_t i = 0; i < arraysize(host_cases); i++) { - // Narrow version. - if (host_cases[i].input8) { - int host_len = static_cast(strlen(host_cases[i].input8)); - url_parse::Component in_comp(0, host_len); - url_parse::Component out_comp; - - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - - bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp, - &output, &out_comp); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, - success); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); - } - - // Wide version. - if (host_cases[i].input16) { - string16 input16(WStringToUTF16(host_cases[i].input16)); - int host_len = static_cast(input16.length()); - url_parse::Component in_comp(0, host_len); - url_parse::Component out_comp; - - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - - bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp, - &output, &out_comp); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, - success); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); - } - } - - // CanonicalizeHostVerbose() - for (size_t i = 0; i < arraysize(host_cases); i++) { - // Narrow version. - if (host_cases[i].input8) { - int host_len = static_cast(strlen(host_cases[i].input8)); - url_parse::Component in_comp(0, host_len); - - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - CanonHostInfo host_info; - - url_canon::CanonicalizeHostVerbose(host_cases[i].input8, in_comp, - &output, &host_info); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, - host_info.out_host.begin); - EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(std::string(host_cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_cases[i].expected_family == CanonHostInfo::IPV4) { - EXPECT_EQ(host_cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - } - - // Wide version. - if (host_cases[i].input16) { - string16 input16(WStringToUTF16(host_cases[i].input16)); - int host_len = static_cast(input16.length()); - url_parse::Component in_comp(0, host_len); - - out_str.clear(); - url_canon::StdStringCanonOutput output(&out_str); - CanonHostInfo host_info; - - url_canon::CanonicalizeHostVerbose(input16.c_str(), in_comp, - &output, &host_info); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, - host_info.out_host.begin); - EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(std::string(host_cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_cases[i].expected_family == CanonHostInfo::IPV4) { - EXPECT_EQ(host_cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - } - } -} - -TEST(URLCanonTest, IPv4) { - IPAddressCase cases[] = { - // Empty is not an IP address. - {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {".", L".", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Regular IP addresses in different bases. - {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - // Non-IP addresses due to invalid characters. - {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid characters for the base should be rejected. - {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // If there are not enough components, the last one should fill them out. - {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"}, - {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, - {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, - {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, - {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2, "C00000A8"}, - {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"}, - {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"}, - {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - // Too many components means not an IP address. - {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // We allow a single trailing dot. - {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Two dots in a row means not an IP address. - {"192.168..1", L"192.168..1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Any numerical overflow should be marked as BROKEN. - {"0x100.0", L"0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0x100.0.0", L"0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0x100.0.0.0", L"0x100.0.0.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0x100.0.0", L"0.0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0.0x100.0", L"0.0.0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0.0.0x100", L"0.0.0.0x100", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0.0x10000", L"0.0.0x10000", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0x1000000", L"0.0x1000000", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0x100000000", L"0x100000000", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Repeat the previous tests, minus 1, to verify boundaries. - {"0xFF.0", L"0xFF.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"}, - {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"}, - {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"}, - {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4, "00FF0000"}, - {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4, "0000FF00"}, - {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4, "000000FF"}, - {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"}, - {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", url_parse::Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"}, - {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", url_parse::Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"}, - // Old trunctations tests. They're all "BROKEN" now. - {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"192.168.0.257", L"192.168.0.257", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"192.168.0xa20001", L"192.168.0xa20001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"192.015052000001", L"192.015052000001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0X12C0a80001", L"0X12C0a80001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"276.1.2", L"276.1.2", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Spaces should be rejected. - {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Very large numbers. - {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"}, - {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", url_parse::Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, - // A number has no length limit, but long numbers can still overflow. - {"00000000000000000001", L"00000000000000000001", "0.0.0.1", url_parse::Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"}, - {"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // If a long component is non-numeric, it's a hostname, *not* a broken IP. - {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Truncation of all zeros should still result in 0. - {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", url_parse::Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - // 8-bit version. - url_parse::Component component(0, - static_cast(strlen(cases[i].input8))); - - std::string out_str1; - url_canon::StdStringCanonOutput output1(&out_str1); - url_canon::CanonHostInfo host_info; - url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1, - &host_info); - output1.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_info.family == CanonHostInfo::IPV4) { - EXPECT_STREQ(cases[i].expected, out_str1.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - - // 16-bit version. - string16 input16(WStringToUTF16(cases[i].input16)); - component = url_parse::Component(0, static_cast(input16.length())); - - std::string out_str2; - url_canon::StdStringCanonOutput output2(&out_str2); - url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2, - &host_info); - output2.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_info.family == CanonHostInfo::IPV4) { - EXPECT_STREQ(cases[i].expected, out_str2.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - } -} - -TEST(URLCanonTest, IPv6) { - IPAddressCase cases[] = { - // Empty is not an IP address. - {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Non-IPs with [:] characters are marked BROKEN. - {":", L":", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[", L"[", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:", L"[:", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"]", L"]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {":]", L":]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[]", L"[]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:]", L"[:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Regular IP address is invalid without bounding '[' and ']'. - {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Regular IP addresses. - {"[::]", L"[::]", "[::]", url_parse::Component(0,4), CanonHostInfo::IPV6, -1, "00000000000000000000000000000000"}, - {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000001"}, - {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1, "00010000000000000000000000000000"}, - - // Leading zeros should be stripped. - {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:5:6:7]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1, "00000001000200030004000500060007"}, - - // Upper case letters should be lowercased. - {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", url_parse::Component(0,20), CanonHostInfo::IPV6, -1, "000A000B000C00DE00FF0000000100AC"}, - - // The same address can be written with different contractions, but should - // get canonicalized to the same thing. - {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"}, - {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"}, - - // Addresses with embedded IPv4. - {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0,10), CanonHostInfo::IPV6, -1, "000000000000000000000000C0A80001"}, - {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"}, - {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "[::eeee:c0a8:1]", url_parse::Component(0, 15), CanonHostInfo::IPV6, -1, "00000000000000000000EEEEC0A80001"}, - {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]", url_parse::Component(0, 14), CanonHostInfo::IPV6, -1, "200100000000000000000000C0A80001"}, - {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // IPv4 with last component missing. - {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0010002"}, - - // IPv4 using hex. - // TODO(eroman): Should this format be disallowed? - {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"}, - - // There may be zeros surrounding the "::" contraction. - {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000008"}, - - {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"}, - - // Can only have one "::" contraction in an IPv6 string literal. - {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // No more than 2 consecutive ':'s. - {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:::]", L"[:::]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Non-IP addresses due to invalid characters. - {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // If there are not enough components, the last one should fill them out. - // ... omitted at this time ... - // Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses. - {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Too many bits (even though 8 comonents, the last one holds 32 bits). - {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Too many bits specified -- the contraction would have to be zero-length - // to not exceed 128 bits. - {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // The contraction is for 16 bits of zero. - {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1, "00010002000300040005000600000008"}, - - // Cannot have a trailing colon. - {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Cannot have negative numbers. - {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Scope ID -- the URL may contain an optional ["%" ] section. - // The scope_id should be included in the canonicalized URL, and is an - // unsigned decimal number. - - // Invalid because no ID was given after the percent. - - // Don't allow scope-id - {"[1::%1]", L"[1::%1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1::%]", L"[1::%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[%]", L"[%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[::%:]", L"[::%:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Don't allow leading or trailing colons. - {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - - // We allow a single trailing dot. - // ... omitted at this time ... - // Two dots in a row means not an IP address. - {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - // Any non-first components get truncated to one byte. - // ... omitted at this time ... - // Spaces should be rejected. - {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1, ""}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - // 8-bit version. - url_parse::Component component(0, - static_cast(strlen(cases[i].input8))); - - std::string out_str1; - url_canon::StdStringCanonOutput output1(&out_str1); - url_canon::CanonHostInfo host_info; - url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1, - &host_info); - output1.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())) << "iter " << i << " host " << cases[i].input8; - if (host_info.family == CanonHostInfo::IPV6) { - EXPECT_STREQ(cases[i].expected, out_str1.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, - host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - } - - // 16-bit version. - string16 input16(WStringToUTF16(cases[i].input16)); - component = url_parse::Component(0, static_cast(input16.length())); - - std::string out_str2; - url_canon::StdStringCanonOutput output2(&out_str2); - url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2, - &host_info); - output2.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_info.family == CanonHostInfo::IPV6) { - EXPECT_STREQ(cases[i].expected, out_str2.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - } - } -} - -TEST(URLCanonTest, IPEmpty) { - std::string out_str1; - url_canon::StdStringCanonOutput output1(&out_str1); - url_canon::CanonHostInfo host_info; - - // This tests tests. - const char spec[] = "192.168.0.1"; - url_canon::CanonicalizeIPAddress(spec, url_parse::Component(), - &output1, &host_info); - EXPECT_FALSE(host_info.IsIPAddress()); - - url_canon::CanonicalizeIPAddress(spec, url_parse::Component(0, 0), - &output1, &host_info); - EXPECT_FALSE(host_info.IsIPAddress()); -} - -TEST(URLCanonTest, UserInfo) { - // Note that the canonicalizer should escape and treat empty components as - // not being there. - - // We actually parse a full input URL so we can get the initial components. - struct UserComponentCase { - const char* input; - const char* expected; - url_parse::Component expected_username; - url_parse::Component expected_password; - bool expected_success; - } user_info_cases[] = { - {"http://user:pass@host.com/", "user:pass@", url_parse::Component(0, 4), url_parse::Component(5, 4), true}, - {"http://@host.com/", "", url_parse::Component(0, -1), url_parse::Component(0, -1), true}, - {"http://:@host.com/", "", url_parse::Component(0, -1), url_parse::Component(0, -1), true}, - {"http://foo:@host.com/", "foo@", url_parse::Component(0, 3), url_parse::Component(0, -1), true}, - {"http://:foo@host.com/", ":foo@", url_parse::Component(0, 0), url_parse::Component(1, 3), true}, - {"http://^ :$\t@host.com/", "%5E%20:$%09@", url_parse::Component(0, 6), url_parse::Component(7, 4), true}, - {"http://user:pass@/", "user:pass@", url_parse::Component(0, 4), url_parse::Component(5, 4), true}, - {"http://%2540:bar@domain.com/", "%2540:bar@", url_parse::Component(0, 5), url_parse::Component(6, 3), true }, - - // IE7 compatability: old versions allowed backslashes in usernames, but - // IE7 does not. We disallow it as well. - {"ftp://me\\mydomain:pass@foo.com/", "", url_parse::Component(0, -1), url_parse::Component(0, -1), true}, - }; - - for (size_t i = 0; i < ARRAYSIZE(user_info_cases); i++) { - int url_len = static_cast(strlen(user_info_cases[i].input)); - url_parse::Parsed parsed; - url_parse::ParseStandardURL(user_info_cases[i].input, url_len, &parsed); - url_parse::Component out_user, out_pass; - std::string out_str; - url_canon::StdStringCanonOutput output1(&out_str); - - bool success = url_canon::CanonicalizeUserInfo(user_info_cases[i].input, - parsed.username, - user_info_cases[i].input, - parsed.password, - &output1, &out_user, - &out_pass); - output1.Complete(); - - EXPECT_EQ(user_info_cases[i].expected_success, success); - EXPECT_EQ(std::string(user_info_cases[i].expected), out_str); - EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin); - EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len); - EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin); - EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len); - - // Now try the wide version - out_str.clear(); - url_canon::StdStringCanonOutput output2(&out_str); - string16 wide_input(ConvertUTF8ToUTF16(user_info_cases[i].input)); - success = url_canon::CanonicalizeUserInfo(wide_input.c_str(), - parsed.username, - wide_input.c_str(), - parsed.password, - &output2, &out_user, &out_pass); - output2.Complete(); - - EXPECT_EQ(user_info_cases[i].expected_success, success); - EXPECT_EQ(std::string(user_info_cases[i].expected), out_str); - EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin); - EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len); - EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin); - EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len); - } -} - -TEST(URLCanonTest, Port) { - // We only need to test that the number gets properly put into the output - // buffer. The parser unit tests will test scanning the number correctly. - // - // Note that the CanonicalizePort will always prepend a colon to the output - // to separate it from the colon that it assumes preceeds it. - struct PortCase { - const char* input; - int default_port; - const char* expected; - url_parse::Component expected_component; - bool expected_success; - } port_cases[] = { - // Invalid input should be copied w/ failure. - {"as df", 80, ":as%20df", url_parse::Component(1, 7), false}, - {"-2", 80, ":-2", url_parse::Component(1, 2), false}, - // Default port should be omitted. - {"80", 80, "", url_parse::Component(0, -1), true}, - {"8080", 80, ":8080", url_parse::Component(1, 4), true}, - // PORT_UNSPECIFIED should mean always keep the port. - {"80", url_parse::PORT_UNSPECIFIED, ":80", url_parse::Component(1, 2), true}, - }; - - for (size_t i = 0; i < ARRAYSIZE(port_cases); i++) { - int url_len = static_cast(strlen(port_cases[i].input)); - url_parse::Component in_comp(0, url_len); - url_parse::Component out_comp; - std::string out_str; - url_canon::StdStringCanonOutput output1(&out_str); - bool success = url_canon::CanonicalizePort(port_cases[i].input, in_comp, - port_cases[i].default_port, - &output1, &out_comp); - output1.Complete(); - - EXPECT_EQ(port_cases[i].expected_success, success); - EXPECT_EQ(std::string(port_cases[i].expected), out_str); - EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len); - - // Now try the wide version - out_str.clear(); - url_canon::StdStringCanonOutput output2(&out_str); - string16 wide_input(ConvertUTF8ToUTF16(port_cases[i].input)); - success = url_canon::CanonicalizePort(wide_input.c_str(), in_comp, - port_cases[i].default_port, - &output2, &out_comp); - output2.Complete(); - - EXPECT_EQ(port_cases[i].expected_success, success); - EXPECT_EQ(std::string(port_cases[i].expected), out_str); - EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len); - } -} - -TEST(URLCanonTest, Path) { - DualComponentCase path_cases[] = { - // ----- path collapsing tests ----- - {"/././foo", L"/././foo", "/foo", url_parse::Component(0, 4), true}, - {"/./.foo", L"/./.foo", "/.foo", url_parse::Component(0, 5), true}, - {"/foo/.", L"/foo/.", "/foo/", url_parse::Component(0, 5), true}, - {"/foo/./", L"/foo/./", "/foo/", url_parse::Component(0, 5), true}, - // double dots followed by a slash or the end of the string count - {"/foo/bar/..", L"/foo/bar/..", "/foo/", url_parse::Component(0, 5), true}, - {"/foo/bar/../", L"/foo/bar/../", "/foo/", url_parse::Component(0, 5), true}, - // don't count double dots when they aren't followed by a slash - {"/foo/..bar", L"/foo/..bar", "/foo/..bar", url_parse::Component(0, 10), true}, - // some in the middle - {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", url_parse::Component(0, 8), true}, - {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", url_parse::Component(0, 2), true}, - // we should not be able to go above the root - {"/foo/../../..", L"/foo/../../..", "/", url_parse::Component(0, 1), true}, - {"/foo/../../../ton", L"/foo/../../../ton", "/ton", url_parse::Component(0, 4), true}, - // escaped dots should be unescaped and treated the same as dots - {"/foo/%2e", L"/foo/%2e", "/foo/", url_parse::Component(0, 5), true}, - {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", url_parse::Component(0, 8), true}, - {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar", url_parse::Component(0, 6), true}, - // Multiple slashes in a row should be preserved and treated like empty - // directory names. - {"////../..", L"////../..", "//", url_parse::Component(0, 2), true}, - - // ----- escaping tests ----- - {"/foo", L"/foo", "/foo", url_parse::Component(0, 4), true}, - // Valid escape sequence - {"/%20foo", L"/%20foo", "/%20foo", url_parse::Component(0, 7), true}, - // Invalid escape sequence we should pass through unchanged. - {"/foo%", L"/foo%", "/foo%", url_parse::Component(0, 5), true}, - {"/foo%2", L"/foo%2", "/foo%2", url_parse::Component(0, 6), true}, - // Invalid escape sequence: bad characters should be treated the same as - // the sourrounding text, not as escaped (in this case, UTF-8). - {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", url_parse::Component(0, 10), true}, - {"/foo%2\xc2\xa9zbar", NULL, "/foo%2%C2%A9zbar", url_parse::Component(0, 16), true}, - {NULL, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", url_parse::Component(0, 22), true}, - // Regular characters that are escaped should be unescaped - {"/foo%41%7a", L"/foo%41%7a", "/fooAz", url_parse::Component(0, 6), true}, - // Funny characters that are unescaped should be escaped - {"/foo\x09\x91%91", NULL, "/foo%09%91%91", url_parse::Component(0, 13), true}, - {NULL, L"/foo\x09\x91%91", "/foo%09%C2%91%91", url_parse::Component(0, 16), true}, - // Invalid characters that are escaped should cause a failure. - {"/foo%00%51", L"/foo%00%51", "/foo%00Q", url_parse::Component(0, 8), false}, - // Some characters should be passed through unchanged regardless of esc. - {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", url_parse::Component(0, 13), true}, - // Characters that are properly escaped should not have the case changed - // of hex letters. - {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", url_parse::Component(0, 13), true}, - // Funny characters that are unescaped should be escaped - {"/foo\tbar", L"/foo\tbar", "/foo%09bar", url_parse::Component(0, 10), true}, - // Backslashes should get converted to forward slashes - {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", url_parse::Component(0, 8), true}, - // Hashes found in paths (possibly only when the caller explicitly sets - // the path on an already-parsed URL) should be escaped. - {"/foo#bar", L"/foo#bar", "/foo%23bar", url_parse::Component(0, 10), true}, - // %7f should be allowed and %3D should not be unescaped (these were wrong - // in a previous version). - {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", url_parse::Component(0, 24), true}, - // @ should be passed through unchanged (escaped or unescaped). - {"/@asdf%40", L"/@asdf%40", "/@asdf%40", url_parse::Component(0, 9), true}, - - // ----- encoding tests ----- - // Basic conversions - {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", url_parse::Component(0, 37), true}, - // Invalid unicode characters should fail. We only do validation on - // UTF-16 input, so this doesn't happen on 8-bit. - {"/\xef\xb7\x90zyx", NULL, "/%EF%B7%90zyx", url_parse::Component(0, 13), true}, - {NULL, L"/\xfdd0zyx", "/%EF%BF%BDzyx", url_parse::Component(0, 13), false}, - }; - - for (size_t i = 0; i < arraysize(path_cases); i++) { - if (path_cases[i].input8) { - int len = static_cast(strlen(path_cases[i].input8)); - url_parse::Component in_comp(0, len); - url_parse::Component out_comp; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizePath(path_cases[i].input8, in_comp, - &output, &out_comp); - output.Complete(); - - EXPECT_EQ(path_cases[i].expected_success, success); - EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(path_cases[i].expected, out_str); - } - - if (path_cases[i].input16) { - string16 input16(WStringToUTF16(path_cases[i].input16)); - int len = static_cast(input16.length()); - url_parse::Component in_comp(0, len); - url_parse::Component out_comp; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - - bool success = url_canon::CanonicalizePath(input16.c_str(), in_comp, - &output, &out_comp); - output.Complete(); - - EXPECT_EQ(path_cases[i].expected_success, success); - EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(path_cases[i].expected, out_str); - } - } - - // Manual test: embedded NULLs should be escaped and the URL should be marked - // as invalid. - const char path_with_null[] = "/ab\0c"; - url_parse::Component in_comp(0, 5); - url_parse::Component out_comp; - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizePath(path_with_null, in_comp, - &output, &out_comp); - output.Complete(); - EXPECT_FALSE(success); - EXPECT_EQ("/ab%00c", out_str); -} - -TEST(URLCanonTest, Query) { - struct QueryCase { - const char* input8; - const wchar_t* input16; - const char* encoding; - const char* expected; - } query_cases[] = { - // Regular ASCII case in some different encodings. - {"foo=bar", L"foo=bar", NULL, "?foo=bar"}, - {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, - {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, - {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, - // Allow question marks in the query without escaping - {"as?df", L"as?df", NULL, "?as?df"}, - // Always escape '#' since it would mark the ref. - {"as#df", L"as#df", NULL, "?as%23df"}, - // Escape some questionable 8-bit characters, but never unescape. - {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"}, - {"%40%41123", L"%40%41123", NULL, "?%40%41123"}, - // Chinese input/output - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5%BD"}, - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3"}, - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, - // Unencodable character in the destination character set should be - // escaped. The escape sequence unescapes to be the entity name: - // "?q=你" - {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2365319%3B"}, - // Invalid UTF-8/16 input should be replaced with invalid characters. - {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"}, - // Don't allow < or > because sometimes they are used for XSS if the - // URL is echoed in content. Firefox does this, IE doesn't. - {"q=", L"q=", NULL, "?q=%3Casdf%3E"}, - // Escape double quotemarks in the query. - {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { - url_parse::Component out_comp; - - UConvScoper conv(query_cases[i].encoding); - ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); - url_canon::ICUCharsetConverter converter(conv.converter()); - - // Map NULL to a NULL converter pointer. - url_canon::ICUCharsetConverter* conv_pointer = &converter; - if (!query_cases[i].encoding) - conv_pointer = NULL; - - if (query_cases[i].input8) { - int len = static_cast(strlen(query_cases[i].input8)); - url_parse::Component in_comp(0, len); - std::string out_str; - - url_canon::StdStringCanonOutput output(&out_str); - url_canon::CanonicalizeQuery(query_cases[i].input8, in_comp, - conv_pointer, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(query_cases[i].expected, out_str); - } - - if (query_cases[i].input16) { - string16 input16(WStringToUTF16(query_cases[i].input16)); - int len = static_cast(input16.length()); - url_parse::Component in_comp(0, len); - std::string out_str; - - url_canon::StdStringCanonOutput output(&out_str); - url_canon::CanonicalizeQuery(input16.c_str(), in_comp, - conv_pointer, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(query_cases[i].expected, out_str); - } - } - - // Extra test for input with embedded NULL; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_parse::Component out_comp; - url_canon::CanonicalizeQuery("a \x00z\x01", url_parse::Component(0, 5), NULL, - &output, &out_comp); - output.Complete(); - EXPECT_EQ("?a%20%00z%01", out_str); -} - -TEST(URLCanonTest, Ref) { - // Refs are trivial, it just checks the encoding. - DualComponentCase ref_cases[] = { - // Regular one, we shouldn't escape spaces, et al. - {"hello, world", L"hello, world", "#hello, world", url_parse::Component(1, 12), true}, - // UTF-8/wide input should be preserved - {"\xc2\xa9", L"\xa9", "#\xc2\xa9", url_parse::Component(1, 2), true}, - // Test a characer that takes > 16 bits (U+10300 = old italic letter A) - {"\xF0\x90\x8C\x80ss", L"\xd800\xdf00ss", "#\xF0\x90\x8C\x80ss", url_parse::Component(1, 6), true}, - // Escaping should be preserved unchanged, even invalid ones - {"%41%a", L"%41%a", "#%41%a", url_parse::Component(1, 5), true}, - // Invalid UTF-8/16 input should be flagged and the input made valid - {"\xc2", NULL, "#\xef\xbf\xbd", url_parse::Component(1, 3), true}, - {NULL, L"\xd800\x597d", "#\xef\xbf\xbd\xe5\xa5\xbd", url_parse::Component(1, 6), true}, - // Test a Unicode invalid character. - {"a\xef\xb7\x90", L"a\xfdd0", "#a\xef\xbf\xbd", url_parse::Component(1, 4), true}, - // Refs can have # signs and we should preserve them. - {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", url_parse::Component(1, 9), true}, - {"#asdf", L"#asdf", "##asdf", url_parse::Component(1, 5), true}, - }; - - for (size_t i = 0; i < arraysize(ref_cases); i++) { - // 8-bit input - if (ref_cases[i].input8) { - int len = static_cast(strlen(ref_cases[i].input8)); - url_parse::Component in_comp(0, len); - url_parse::Component out_comp; - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_canon::CanonicalizeRef(ref_cases[i].input8, in_comp, - &output, &out_comp); - output.Complete(); - - EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(ref_cases[i].expected, out_str); - } - - // 16-bit input - if (ref_cases[i].input16) { - string16 input16(WStringToUTF16(ref_cases[i].input16)); - int len = static_cast(input16.length()); - url_parse::Component in_comp(0, len); - url_parse::Component out_comp; - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_canon::CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(ref_cases[i].expected, out_str); - } - } - - // Try one with an embedded NULL. It should be stripped. - const char null_input[5] = "ab\x00z"; - url_parse::Component null_input_component(0, 4); - url_parse::Component out_comp; - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_canon::CanonicalizeRef(null_input, null_input_component, - &output, &out_comp); - output.Complete(); - - EXPECT_EQ(1, out_comp.begin); - EXPECT_EQ(3, out_comp.len); - EXPECT_EQ("#abz", out_str); -} - -TEST(URLCanonTest, CanonicalizeStandardURL) { - // The individual component canonicalize tests should have caught the cases - // for each of those components. Here, we just need to test that the various - // parts are included or excluded properly, and have the correct separators. - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - } cases[] = { - {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true}, - {"http://[www.google.com]/", "http://[www.google.com]/", false}, - {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false}, - {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", true}, - {"www.google.com", ":www.google.com/", true}, - {"http://192.0x00A80001", "http://192.168.0.1/", true}, - {"http://www/foo%2Ehtml", "http://www/foo.html", true}, - {"http://user:pass@/", "http://user:pass@/", false}, - {"http://%25DOMAIN:foobar@foodomain.com/", "http://%25DOMAIN:foobar@foodomain.com/", true}, - - // Backslashes should get converted to forward slashes. - {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true}, - - // Busted refs shouldn't make the whole thing fail. - {"http://www.google.com/asdf#\xc2", "http://www.google.com/asdf#\xef\xbf\xbd", true}, - - // Basic port tests. - {"http://foo:80/", "http://foo/", true}, - {"http://foo:81/", "http://foo:81/", true}, - {"httpa://foo:80/", "httpa://foo:80/", true}, - {"http://foo:-80/", "http://foo:-80/", false}, - - {"https://foo:443/", "https://foo/", true}, - {"https://foo:80/", "https://foo:80/", true}, - {"ftp://foo:21/", "ftp://foo/", true}, - {"ftp://foo:80/", "ftp://foo:80/", true}, - {"gopher://foo:70/", "gopher://foo/", true}, - {"gopher://foo:443/", "gopher://foo:443/", true}, - {"ws://foo:80/", "ws://foo/", true}, - {"ws://foo:81/", "ws://foo:81/", true}, - {"ws://foo:443/", "ws://foo:443/", true}, - {"ws://foo:815/", "ws://foo:815/", true}, - {"wss://foo:80/", "wss://foo:80/", true}, - {"wss://foo:81/", "wss://foo:81/", true}, - {"wss://foo:443/", "wss://foo/", true}, - {"wss://foo:815/", "wss://foo:815/", true}, - }; - - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - url_parse::Parsed parsed; - url_parse::ParseStandardURL(cases[i].input, url_len, &parsed); - - url_parse::Parsed out_parsed; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizeStandardURL( - cases[i].input, url_len, parsed, NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - } -} - -// The codepath here is the same as for regular canonicalization, so we just -// need to test that things are replaced or not correctly. -TEST(URLCanonTest, ReplaceStandardURL) { - ReplaceCase replace_cases[] = { - // Common case of truncating the path. - {"http://www.google.com/foo?bar=baz#ref", NULL, NULL, NULL, NULL, NULL, "/", kDeleteComp, kDeleteComp, "http://www.google.com/"}, - // Replace everything - {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"}, - // Replace nothing - {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"}, - // Replace scheme with filesystem. The result is garbage, but you asked - // for it. - {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - url_parse::Parsed parsed; - url_parse::ParseStandardURL(cur.base, base_len, &parsed); - - url_canon::Replacements r; - typedef url_canon::Replacements R; // Clean up syntax. - - // Note that for the scheme we pass in a different clear function since - // there is no function to clear the scheme. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_parse::Parsed out_parsed; - url_canon::ReplaceStandardURL(replace_cases[i].base, parsed, - r, NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } - - // The path pointer should be ignored if the address is invalid. - { - const char src[] = "http://www.google.com/here_is_the_path"; - int src_len = static_cast(strlen(src)); - - url_parse::Parsed parsed; - url_parse::ParseStandardURL(src, src_len, &parsed); - - // Replace the path to 0 length string. By using 1 as the string address, - // the test should get an access violation if it tries to dereference it. - url_canon::Replacements r; - r.SetPath(reinterpret_cast(0x00000001), url_parse::Component(0, 0)); - std::string out_str1; - url_canon::StdStringCanonOutput output1(&out_str1); - url_parse::Parsed new_parsed; - url_canon::ReplaceStandardURL(src, parsed, r, NULL, &output1, &new_parsed); - output1.Complete(); - EXPECT_STREQ("http://www.google.com/", out_str1.c_str()); - - // Same with an "invalid" path. - r.SetPath(reinterpret_cast(0x00000001), url_parse::Component()); - std::string out_str2; - url_canon::StdStringCanonOutput output2(&out_str2); - url_canon::ReplaceStandardURL(src, parsed, r, NULL, &output2, &new_parsed); - output2.Complete(); - EXPECT_STREQ("http://www.google.com/", out_str2.c_str()); - } -} - -TEST(URLCanonTest, ReplaceFileURL) { - ReplaceCase replace_cases[] = { - // Replace everything - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, - // Replace nothing - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, - // Clear non-path components (common) - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"}, - // Replace path with something that doesn't begin with a slash and make - // sure it gets added properly. - {"file:///C:/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///home/gaba?query#ref"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///home/gaba"}, - {"file:///home/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"}, - // Replace scheme -- shouldn't do anything. - {"file:///C:/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - url_parse::Parsed parsed; - url_parse::ParseFileURL(cur.base, base_len, &parsed); - - url_canon::Replacements r; - typedef url_canon::Replacements R; // Clean up syntax. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_parse::Parsed out_parsed; - url_canon::ReplaceFileURL(cur.base, parsed, - r, NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, ReplaceFileSystemURL) { - ReplaceCase replace_cases[] = { - // Replace everything in the outer URL. - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"}, - // Replace nothing - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"}, - // Clear non-path components (common) - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "filesystem:file:///temporary/gaba"}, - // Replace path with something that doesn't begin with a slash and make - // sure it gets added properly. - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "filesystem:file:///temporary/interesting/?query#ref"}, - // Replace scheme -- shouldn't do anything. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, - // Replace username -- shouldn't do anything. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, - // Replace password -- shouldn't do anything. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2", NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, - // Replace host -- shouldn't do anything. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, NULL, "foo.com", NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, - // Replace port -- shouldn't do anything. - {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL, NULL, "41", NULL, NULL, NULL, "filesystem:http://u:p@bar.com:40/t/gaba?query#ref"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - url_parse::Parsed parsed; - url_parse::ParseFileSystemURL(cur.base, base_len, &parsed); - - url_canon::Replacements r; - typedef url_canon::Replacements R; // Clean up syntax. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_parse::Parsed out_parsed; - url_canon::ReplaceFileSystemURL(cur.base, parsed, r, NULL, - &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, ReplacePathURL) { - ReplaceCase replace_cases[] = { - // Replace everything - {"data:foo", "javascript", NULL, NULL, NULL, NULL, "alert('foo?');", NULL, NULL, "javascript:alert('foo?');"}, - // Replace nothing - {"data:foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "data:foo"}, - // Replace one or the other - {"data:foo", "javascript", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "javascript:foo"}, - {"data:foo", NULL, NULL, NULL, NULL, NULL, "bar", NULL, NULL, "data:bar"}, - {"data:foo", NULL, NULL, NULL, NULL, NULL, kDeleteComp, NULL, NULL, "data:"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - url_parse::Parsed parsed; - url_parse::ParsePathURL(cur.base, base_len, &parsed); - - url_canon::Replacements r; - typedef url_canon::Replacements R; // Clean up syntax. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_parse::Parsed out_parsed; - url_canon::ReplacePathURL(cur.base, parsed, - r, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, ReplaceMailtoURL) { - ReplaceCase replace_cases[] = { - // Replace everything - {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"}, - // Replace nothing - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"}, - // Replace the path - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"}, - // Replace the query - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"}, - // Replace the path and query - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"}, - // Set the query to empty (should leave trailing question mark) - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"}, - // Clear the query - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"}, - // Clear the path - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"}, - // Clear the path + query - {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"}, - // Setting the ref should have no effect - {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - url_parse::Parsed parsed; - url_parse::ParseMailtoURL(cur.base, base_len, &parsed); - - url_canon::Replacements r; - typedef url_canon::Replacements R; - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - url_parse::Parsed out_parsed; - url_canon::ReplaceMailtoURL(cur.base, parsed, - r, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, CanonicalizeFileURL) { - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - url_parse::Component expected_host; - url_parse::Component expected_path; - } cases[] = { -#ifdef _WIN32 - // Windows-style paths - {"file:c:\\foo\\bar.html", "file:///C:/foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 16)}, - {" File:c|////foo\\bar.html", "file:///C:////foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 19)}, - {"file:", "file:///", true, url_parse::Component(), url_parse::Component(7, 1)}, - {"file:UNChost/path", "file://unchost/path", true, url_parse::Component(7, 7), url_parse::Component(14, 5)}, - // CanonicalizeFileURL supports absolute Windows style paths for IE - // compatability. Note that the caller must decide that this is a file - // URL itself so it can call the file canonicalizer. This is usually - // done automatically as part of relative URL resolving. - {"c:\\foo\\bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)}, - {"C|/foo/bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)}, - {"/C|\\foo\\bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)}, - {"//C|/foo/bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)}, - {"//server/file", "file://server/file", true, url_parse::Component(7, 6), url_parse::Component(13, 5)}, - {"\\\\server\\file", "file://server/file", true, url_parse::Component(7, 6), url_parse::Component(13, 5)}, - {"/\\server/file", "file://server/file", true, url_parse::Component(7, 6), url_parse::Component(13, 5)}, - // We should preserve the number of slashes after the colon for IE - // compatability, except when there is none, in which case we should - // add one. - {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 16)}, - {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 19)}, - // Three slashes should be non-UNC, even if there is no drive spec (IE - // does this, which makes the resulting request invalid). - {"file:///foo/bar.txt", "file:///foo/bar.txt", true, url_parse::Component(), url_parse::Component(7, 12)}, - // TODO(brettw) we should probably fail for invalid host names, which - // would change the expected result on this test. We also currently allow - // colon even though it's probably invalid, because its currently the - // "natural" result of the way the canonicalizer is written. There doesn't - // seem to be a strong argument for why allowing it here would be bad, so - // we just tolerate it and the load will fail later. - {"FILE:/\\/\\7:\\\\//foo\\bar.html", "file://7:////foo/bar.html", false, url_parse::Component(7, 2), url_parse::Component(9, 16)}, - {"file:filer/home\\me", "file://filer/home/me", true, url_parse::Component(7, 5), url_parse::Component(12, 8)}, - // Make sure relative paths can't go above the "C:" - {"file:///C:/foo/../../../bar.html", "file:///C:/bar.html", true, url_parse::Component(), url_parse::Component(7, 12)}, - // Busted refs shouldn't make the whole thing fail. - {"file:///C:/asdf#\xc2", "file:///C:/asdf#\xef\xbf\xbd", true, url_parse::Component(), url_parse::Component(7, 8)}, -#else - // Unix-style paths - {"file:///home/me", "file:///home/me", true, url_parse::Component(), url_parse::Component(7, 8)}, - // Windowsy ones should get still treated as Unix-style. - {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 16)}, - {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 19)}, - // file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html) - {"//", "file:///", true, url_parse::Component(), url_parse::Component(7, 1)}, - {"///", "file:///", true, url_parse::Component(), url_parse::Component(7, 1)}, - {"///test", "file:///test", true, url_parse::Component(), url_parse::Component(7, 5)}, - {"file://test", "file://test/", true, url_parse::Component(7, 4), url_parse::Component(11, 1)}, - {"file://localhost", "file://localhost/", true, url_parse::Component(7, 9), url_parse::Component(16, 1)}, - {"file://localhost/", "file://localhost/", true, url_parse::Component(7, 9), url_parse::Component(16, 1)}, - {"file://localhost/test", "file://localhost/test", true, url_parse::Component(7, 9), url_parse::Component(16, 5)}, -#endif // _WIN32 - }; - - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - url_parse::Parsed parsed; - url_parse::ParseFileURL(cases[i].input, url_len, &parsed); - - url_parse::Parsed out_parsed; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizeFileURL(cases[i].input, url_len, - parsed, NULL, &output, - &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - - // Make sure the spec was properly identified, the file canonicalizer has - // different code for writing the spec. - EXPECT_EQ(0, out_parsed.scheme.begin); - EXPECT_EQ(4, out_parsed.scheme.len); - - EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin); - EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len); - - EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin); - EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len); - } -} - -TEST(URLCanonTest, CanonicalizeFileSystemURL) { - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - } cases[] = { - {"Filesystem:htTp://www.Foo.com:80/tempoRary", "filesystem:http://www.foo.com/tempoRary/", true}, - {"filesystem:httpS://www.foo.com/temporary/", "filesystem:https://www.foo.com/temporary/", true}, - {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//", false}, - {"filesystem:http://www.foo.com/persistent/bob?query#ref", "filesystem:http://www.foo.com/persistent/bob?query#ref", true}, - {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true}, - {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true}, - {"filesystem:File:///temporary/Bob?qUery#reF", "filesystem:file:///temporary/Bob?qUery#reF", true}, - }; - - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - url_parse::Parsed parsed; - url_parse::ParseFileSystemURL(cases[i].input, url_len, &parsed); - - url_parse::Parsed out_parsed; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizeFileSystemURL(cases[i].input, url_len, - parsed, NULL, &output, - &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - - // Make sure the spec was properly identified, the filesystem canonicalizer - // has different code for writing the spec. - EXPECT_EQ(0, out_parsed.scheme.begin); - EXPECT_EQ(10, out_parsed.scheme.len); - if (success) - EXPECT_GT(out_parsed.path.len, 0); - } -} - -TEST(URLCanonTest, CanonicalizePathURL) { - // Path URLs should get canonicalized schemes but nothing else. - struct PathCase { - const char* input; - const char* expected; - } path_cases[] = { - {"javascript:", "javascript:"}, - {"JavaScript:Foo", "javascript:Foo"}, - {":\":This /is interesting;?#", ":\":This /is interesting;?#"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(path_cases); i++) { - int url_len = static_cast(strlen(path_cases[i].input)); - url_parse::Parsed parsed; - url_parse::ParsePathURL(path_cases[i].input, url_len, &parsed); - - url_parse::Parsed out_parsed; - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizePathURL(path_cases[i].input, url_len, - parsed, &output, - &out_parsed); - output.Complete(); - - EXPECT_TRUE(success); - EXPECT_EQ(path_cases[i].expected, out_str); - - EXPECT_EQ(0, out_parsed.host.begin); - EXPECT_EQ(-1, out_parsed.host.len); - - // When we end with a colon at the end, there should be no path. - if (path_cases[i].input[url_len - 1] == ':') { - EXPECT_EQ(0, out_parsed.path.begin); - EXPECT_EQ(-1, out_parsed.path.len); - } - } -} - -TEST(URLCanonTest, CanonicalizeMailtoURL) { - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - url_parse::Component expected_path; - url_parse::Component expected_query; - } cases[] = { - {"mailto:addr1", "mailto:addr1", true, url_parse::Component(7, 5), url_parse::Component()}, - {"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, url_parse::Component(7, 13), url_parse::Component()}, - // Trailing whitespace is stripped. - {"MaIlTo:addr1 \t ", "mailto:addr1", true, url_parse::Component(7, 5), url_parse::Component()}, - {"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, url_parse::Component(7, 5), url_parse::Component(13,6)}, - {"mailto:addr1,addr2", "mailto:addr1,addr2", true, url_parse::Component(7, 11), url_parse::Component()}, - {"mailto:addr1, addr2", "mailto:addr1, addr2", true, url_parse::Component(7, 12), url_parse::Component()}, - {"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, url_parse::Component(7, 13), url_parse::Component()}, - {"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, url_parse::Component(7, 12), url_parse::Component()}, - // Null character should be escaped to %00 - {"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, url_parse::Component(7, 13), url_parse::Component(21, 3)}, - // Invalid -- UTF-8 encoded surrogate value. - {"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, url_parse::Component(7, 9), url_parse::Component()}, - {"mailto:addr1?", "mailto:addr1?", true, url_parse::Component(7, 5), url_parse::Component(13, 0)}, - }; - - // Define outside of loop to catch bugs where components aren't reset - url_parse::Parsed parsed; - url_parse::Parsed out_parsed; - - for (size_t i = 0; i < ARRAYSIZE(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - if (i == 8) { - // The 9th test case purposely has a '\0' in it -- don't count it - // as the string terminator. - url_len = 22; - } - url_parse::ParseMailtoURL(cases[i].input, url_len, &parsed); - - std::string out_str; - url_canon::StdStringCanonOutput output(&out_str); - bool success = url_canon::CanonicalizeMailtoURL(cases[i].input, url_len, - parsed, &output, - &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - - // Make sure the spec was properly identified - EXPECT_EQ(0, out_parsed.scheme.begin); - EXPECT_EQ(6, out_parsed.scheme.len); - - EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin); - EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len); - - EXPECT_EQ(cases[i].expected_query.begin, out_parsed.query.begin); - EXPECT_EQ(cases[i].expected_query.len, out_parsed.query.len); - } -} - -#ifndef WIN32 - -TEST(URLCanonTest, _itoa_s) { - // We fill the buffer with 0xff to ensure that it's getting properly - // null-terminated. We also allocate one byte more than what we tell - // _itoa_s about, and ensure that the extra byte is untouched. - char buf[6]; - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itoa_s(12, buf, sizeof(buf) - 1, 10)); - EXPECT_STREQ("12", buf); - EXPECT_EQ('\xFF', buf[3]); - - // Test the edge cases - exactly the buffer size and one over - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itoa_s(1234, buf, sizeof(buf) - 1, 10)); - EXPECT_STREQ("1234", buf); - EXPECT_EQ('\xFF', buf[5]); - - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(EINVAL, url_canon::_itoa_s(12345, buf, sizeof(buf) - 1, 10)); - EXPECT_EQ('\xFF', buf[5]); // should never write to this location - - // Test the template overload (note that this will see the full buffer) - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itoa_s(12, buf, 10)); - EXPECT_STREQ("12", buf); - EXPECT_EQ('\xFF', buf[3]); - - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itoa_s(12345, buf, 10)); - EXPECT_STREQ("12345", buf); - - EXPECT_EQ(EINVAL, url_canon::_itoa_s(123456, buf, 10)); - - // Test that radix 16 is supported. - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itoa_s(1234, buf, sizeof(buf) - 1, 16)); - EXPECT_STREQ("4d2", buf); - EXPECT_EQ('\xFF', buf[5]); -} - -TEST(URLCanonTest, _itow_s) { - // We fill the buffer with 0xff to ensure that it's getting properly - // null-terminated. We also allocate one byte more than what we tell - // _itoa_s about, and ensure that the extra byte is untouched. - char16 buf[6]; - const char fill_mem = 0xff; - const char16 fill_char = 0xffff; - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itow_s(12, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(WStringToUTF16(L"12"), string16(buf)); - EXPECT_EQ(fill_char, buf[3]); - - // Test the edge cases - exactly the buffer size and one over - EXPECT_EQ(0, url_canon::_itow_s(1234, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(WStringToUTF16(L"1234"), string16(buf)); - EXPECT_EQ(fill_char, buf[5]); - - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(EINVAL, url_canon::_itow_s(12345, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(fill_char, buf[5]); // should never write to this location - - // Test the template overload (note that this will see the full buffer) - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itow_s(12, buf, 10)); - EXPECT_EQ(WStringToUTF16(L"12"), string16(buf)); - EXPECT_EQ(fill_char, buf[3]); - - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(0, url_canon::_itow_s(12345, buf, 10)); - EXPECT_EQ(WStringToUTF16(L"12345"), string16(buf)); - - EXPECT_EQ(EINVAL, url_canon::_itow_s(123456, buf, 10)); -} - -#endif // !WIN32 - -// Returns true if the given two structures are the same. -static bool ParsedIsEqual(const url_parse::Parsed& a, - const url_parse::Parsed& b) { - return a.scheme.begin == b.scheme.begin && a.scheme.len == b.scheme.len && - a.username.begin == b.username.begin && a.username.len == b.username.len && - a.password.begin == b.password.begin && a.password.len == b.password.len && - a.host.begin == b.host.begin && a.host.len == b.host.len && - a.port.begin == b.port.begin && a.port.len == b.port.len && - a.path.begin == b.path.begin && a.path.len == b.path.len && - a.query.begin == b.query.begin && a.query.len == b.query.len && - a.ref.begin == b.ref.begin && a.ref.len == b.ref.len; -} - -TEST(URLCanonTest, ResolveRelativeURL) { - struct RelativeCase { - const char* base; // Input base URL: MUST BE CANONICAL - bool is_base_hier; // Is the base URL hierarchical - bool is_base_file; // Tells us if the base is a file URL. - const char* test; // Input URL to test against. - bool succeed_relative; // Whether we expect IsRelativeURL to succeed - bool is_rel; // Whether we expect |test| to be relative or not. - bool succeed_resolve; // Whether we expect ResolveRelativeURL to succeed. - const char* resolved; // What we expect in the result when resolving. - } rel_cases[] = { - // Basic absolute input. - {"http://host/a", true, false, "http://another/", true, false, false, NULL}, - {"http://host/a", true, false, "http:////another/", true, false, false, NULL}, - // Empty relative URLs should only remove the ref part of the URL, - // leaving the rest unchanged. - {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"}, - {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"}, - {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"}, - // Spaces at the ends of the relative path should be ignored. - {"http://foo/bar", true, false, " another ", true, true, true, "http://foo/another"}, - {"http://foo/bar", true, false, " . ", true, true, true, "http://foo/"}, - {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"}, - // Matching schemes without two slashes are treated as relative. - {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"}, - {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"}, - {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"}, - {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"}, - // Nonmatching schemes are absolute. - {"http://host/a", true, false, "https:host2", true, false, false, NULL}, - {"http://host/a", true, false, "htto:/host2", true, false, false, NULL}, - // Absolute path input - {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"}, - {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"}, - {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"}, - {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"}, - {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"}, - {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"}, - // Relative path input - {"http://host/a", true, false, "b", true, true, true, "http://host/b"}, - {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"}, - {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"}, - {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"}, - {"http://host/a/", true, false, "..", true, true, true, "http://host/"}, - {"http://host/a/", true, false, "./..", true, true, true, "http://host/"}, - {"http://host/a/", true, false, "../.", true, true, true, "http://host/"}, - {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"}, - {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"}, - // Query input - {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"}, - {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"}, - {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"}, - // Ref input - {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"}, - {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"}, - {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"}, - // Non-hierarchical base: no relative handling. Relative input should - // error, and if a scheme is present, it should be treated as absolute. - {"data:foobar", false, false, "baz.html", false, false, false, NULL}, - {"data:foobar", false, false, "data:baz", true, false, false, NULL}, - {"data:foobar", false, false, "data:/base", true, false, false, NULL}, - // Non-hierarchical base: absolute input should succeed. - {"data:foobar", false, false, "http://host/", true, false, false, NULL}, - {"data:foobar", false, false, "http:host", true, false, false, NULL}, - // Invalid schemes should be treated as relative. - {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"}, - {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"}, - {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"}, - {"data:asdf", false, false, ":foo", false, false, false, NULL}, - // We should treat semicolons like any other character in URL resolving - {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"}, - {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"}, - {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"}, - // Relative URLs can also be written as "//foo/bar" which is relative to - // the scheme. In this case, it would take the old scheme, so for http - // the example would resolve to "http://foo/bar". - {"http://host/a", true, false, "//another", true, true, true, "http://another/"}, - {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"}, - {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"}, - {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"}, - {"http://host/a", true, false, "//", true, true, false, "http:"}, - // IE will also allow one or the other to be a backslash to get the same - // behavior. - {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"}, - {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"}, -#ifdef WIN32 - // Resolving against Windows file base URLs. - {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL}, - {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"}, - {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"}, - {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"}, - // But two backslashes on Windows should be UNC so should be treated - // as absolute. - {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL}, - // IE doesn't support drive specs starting with two slashes. It fails - // immediately and doesn't even try to load. We fix it up to either - // an absolute path or UNC depending on what it looks like. - {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"}, - {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"}, - // Windows drive specs should be allowed and treated as absolute. - {"file:///C:/foo", true, true, "c:", true, false, false, NULL}, - {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL}, - {"http://host/a", true, false, "c:\\foo", true, false, false, NULL}, - // Relative paths with drive letters should be allowed when the base is - // also a file. - {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"}, - // Treat absolute paths as being off of the drive. - {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"}, - {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"}, - {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"}, - // On Windows, two slashes without a drive letter when the base is a file - // means that the path is UNC. - {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"}, - {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"}, -#else - // On Unix we fall back to relative behavior since there's nothing else - // reasonable to do. - {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"}, -#endif - // Even on Windows, we don't allow relative drive specs when the base - // is not file. - {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"}, - {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"}, - // Filesystem URL tests; filesystem URLs are only valid and relative if - // they have no scheme, e.g. "./index.html". There's no valid equivalent - // to http:index.html. - {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL}, - {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL}, - {"filesystem:http://host/t/path", true, false, "http://host/t/path2", true, false, false, NULL}, - {"http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL}, - {"filesystem:http://host/t/path", true, false, "./path2", true, true, true, "filesystem:http://host/t/path2"}, - {"filesystem:http://host/t/path/", true, false, "path2", true, true, true, "filesystem:http://host/t/path/path2"}, - {"filesystem:http://host/t/path", true, false, "filesystem:http:path2", true, false, false, NULL}, - // Absolute URLs are still not relative to a non-standard base URL. - {"about:blank", false, false, "http://X/A", true, false, true, ""}, - {"about:blank", false, false, "content://content.Provider/", true, false, true, ""}, - }; - - for (size_t i = 0; i < ARRAYSIZE(rel_cases); i++) { - const RelativeCase& cur_case = rel_cases[i]; - - url_parse::Parsed parsed; - int base_len = static_cast(strlen(cur_case.base)); - if (cur_case.is_base_file) - url_parse::ParseFileURL(cur_case.base, base_len, &parsed); - else if (cur_case.is_base_hier) - url_parse::ParseStandardURL(cur_case.base, base_len, &parsed); - else - url_parse::ParsePathURL(cur_case.base, base_len, &parsed); - - // First see if it is relative. - int test_len = static_cast(strlen(cur_case.test)); - bool is_relative; - url_parse::Component relative_component; - bool succeed_is_rel = url_canon::IsRelativeURL( - cur_case.base, parsed, cur_case.test, test_len, cur_case.is_base_hier, - &is_relative, &relative_component); - - EXPECT_EQ(cur_case.succeed_relative, succeed_is_rel) << - "succeed is rel failure on " << cur_case.test; - EXPECT_EQ(cur_case.is_rel, is_relative) << - "is rel failure on " << cur_case.test; - // Now resolve it. - if (succeed_is_rel && is_relative && cur_case.is_rel) { - std::string resolved; - url_canon::StdStringCanonOutput output(&resolved); - url_parse::Parsed resolved_parsed; - - bool succeed_resolve = url_canon::ResolveRelativeURL( - cur_case.base, parsed, cur_case.is_base_file, - cur_case.test, relative_component, NULL, &output, &resolved_parsed); - output.Complete(); - - EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve); - EXPECT_EQ(cur_case.resolved, resolved) << " on " << cur_case.test; - - // Verify that the output parsed structure is the same as parsing a - // the URL freshly. - url_parse::Parsed ref_parsed; - int resolved_len = static_cast(resolved.size()); - if (cur_case.is_base_file) - url_parse::ParseFileURL(resolved.c_str(), resolved_len, &ref_parsed); - else if (cur_case.is_base_hier) - url_parse::ParseStandardURL(resolved.c_str(), resolved_len, &ref_parsed); - else - url_parse::ParsePathURL(resolved.c_str(), resolved_len, &ref_parsed); - EXPECT_TRUE(ParsedIsEqual(ref_parsed, resolved_parsed)); - } - } -} - -// It used to be when we did a replacement with a long buffer of UTF-16 -// characters, we would get invalid data in the URL. This is because the buffer -// it used to hold the UTF-8 data was resized, while some pointers were still -// kept to the old buffer that was removed. -TEST(URLCanonTest, ReplacementOverflow) { - const char src[] = "file:///C:/foo/bar"; - int src_len = static_cast(strlen(src)); - url_parse::Parsed parsed; - url_parse::ParseFileURL(src, src_len, &parsed); - - // Override two components, the path with something short, and the query with - // sonething long enough to trigger the bug. - url_canon::Replacements repl; - string16 new_query; - for (int i = 0; i < 4800; i++) - new_query.push_back('a'); - - string16 new_path(WStringToUTF16(L"/foo")); - repl.SetPath(new_path.c_str(), url_parse::Component(0, 4)); - repl.SetQuery(new_query.c_str(), - url_parse::Component(0, static_cast(new_query.length()))); - - // Call ReplaceComponents on the string. It doesn't matter if we call it for - // standard URLs, file URLs, etc, since they will go to the same replacement - // function that was buggy. - url_parse::Parsed repl_parsed; - std::string repl_str; - url_canon::StdStringCanonOutput repl_output(&repl_str); - url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed); - repl_output.Complete(); - - // Generate the expected string and check. - std::string expected("file:///foo?"); - for (size_t i = 0; i < new_query.length(); i++) - expected.push_back('a'); - EXPECT_TRUE(expected == repl_str); -} diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_common.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_common.h.svn-base deleted file mode 100644 index ac045a8ce..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_common.h.svn-base +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2010, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLEURL_SRC_URL_COMMON_H__ -#define GOOGLEURL_SRC_URL_COMMON_H__ - -#if !defined(GURL_IMPLEMENTATION) -#define GURL_IMPLEMENTATION 0 -#endif - -#if defined(GURL_DLL) -#if defined(WIN32) -#if GURL_IMPLEMENTATION -#define GURL_API __declspec(dllexport) -#else -#define GURL_API __declspec(dllimport) -#endif -#else -// Non-Windows DLLs. -#define GURL_API __attribute__((visibility("default"))) -#endif -#else -// Not a DLL. -#define GURL_API -#endif - -#endif // GOOGLEURL_SRC_URL_COMMON_H__ - diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_file.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_file.h.svn-base deleted file mode 100644 index c1b8ac9c5..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_file.h.svn-base +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Provides shared functions used by the internals of the parser and -// canonicalizer for file URLs. Do not use outside of these modules. - -#ifndef GOOGLEURL_SRC_URL_FILE_H__ -#define GOOGLEURL_SRC_URL_FILE_H__ - -#include "googleurl/src/url_parse_internal.h" - -namespace url_parse { - -#ifdef WIN32 - -// We allow both "c:" and "c|" as drive identifiers. -inline bool IsWindowsDriveSeparator(char16 ch) { - return ch == ':' || ch == '|'; -} -inline bool IsWindowsDriveLetter(char16 ch) { - return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); -} - -#endif // WIN32 - -// Returns the index of the next slash in the input after the given index, or -// spec_len if the end of the input is reached. -template -inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) { - int idx = begin_index; - while (idx < spec_len && !IsURLSlash(spec[idx])) - idx++; - return idx; -} - -#ifdef WIN32 - -// Returns true if the start_offset in the given spec looks like it begins a -// drive spec, for example "c:". This function explicitly handles start_offset -// values that are equal to or larger than the spec_len to simplify callers. -// -// If this returns true, the spec is guaranteed to have a valid drive letter -// plus a colon starting at |start_offset|. -template -inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset, - int spec_len) { - int remaining_len = spec_len - start_offset; - if (remaining_len < 2) - return false; // Not enough room. - if (!IsWindowsDriveLetter(spec[start_offset])) - return false; // Doesn't start with a valid drive letter. - if (!IsWindowsDriveSeparator(spec[start_offset + 1])) - return false; // Isn't followed with a drive separator. - return true; -} - -// Returns true if the start_offset in the given text looks like it begins a -// UNC path, for example "\\". This function explicitly handles start_offset -// values that are equal to or larger than the spec_len to simplify callers. -// -// When strict_slashes is set, this function will only accept backslashes as is -// standard for Windows. Otherwise, it will accept forward slashes as well -// which we use for a lot of URL handling. -template -inline bool DoesBeginUNCPath(const CHAR* text, - int start_offset, - int len, - bool strict_slashes) { - int remaining_len = len - start_offset; - if (remaining_len < 2) - return false; - - if (strict_slashes) - return text[start_offset] == '\\' && text[start_offset + 1] == '\\'; - return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]); -} - -#endif // WIN32 - -} // namespace url_parse - -#endif // GOOGLEURL_SRC_URL_FILE_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.cc.svn-base deleted file mode 100644 index b06f4bbe4..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.cc.svn-base +++ /dev/null @@ -1,923 +0,0 @@ -/* Based on nsURLParsers.cc from Mozilla - * ------------------------------------- - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Darin Fisher (original author) - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "googleurl/src/url_parse.h" - -#include - -#include "base/logging.h" -#include "googleurl/src/url_parse_internal.h" -#include "googleurl/src/url_util.h" -#include "googleurl/src/url_util_internal.h" - -namespace url_parse { - -namespace { - -// Returns true if the given character is a valid digit to use in a port. -inline bool IsPortDigit(char16 ch) { - return ch >= '0' && ch <= '9'; -} - -// Returns the offset of the next authority terminator in the input starting -// from start_offset. If no terminator is found, the return value will be equal -// to spec_len. -template -int FindNextAuthorityTerminator(const CHAR* spec, - int start_offset, - int spec_len) { - for (int i = start_offset; i < spec_len; i++) { - if (IsAuthorityTerminator(spec[i])) - return i; - } - return spec_len; // Not found. -} - -template -void ParseUserInfo(const CHAR* spec, - const Component& user, - Component* username, - Component* password) { - // Find the first colon in the user section, which separates the username and - // password. - int colon_offset = 0; - while (colon_offset < user.len && spec[user.begin + colon_offset] != ':') - colon_offset++; - - if (colon_offset < user.len) { - // Found separator: : - *username = Component(user.begin, colon_offset); - *password = MakeRange(user.begin + colon_offset + 1, - user.begin + user.len); - } else { - // No separator, treat everything as the username - *username = user; - *password = Component(); - } -} - -template -void ParseServerInfo(const CHAR* spec, - const Component& serverinfo, - Component* hostname, - Component* port_num) { - if (serverinfo.len == 0) { - // No server info, host name is empty. - hostname->reset(); - port_num->reset(); - return; - } - - // If the host starts with a left-bracket, assume the entire host is an - // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal. - // This assumption will be overridden if we find a right-bracket. - // - // Our IPv6 address canonicalization code requires both brackets to exist, - // but the ability to locate an incomplete address can still be useful. - int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1; - int colon = -1; - - // Find the last right-bracket, and the last colon. - for (int i = serverinfo.begin; i < serverinfo.end(); i++) { - switch (spec[i]) { - case ']': - ipv6_terminator = i; - break; - case ':': - colon = i; - break; - } - } - - if (colon > ipv6_terminator) { - // Found a port number: : - *hostname = MakeRange(serverinfo.begin, colon); - if (hostname->len == 0) - hostname->reset(); - *port_num = MakeRange(colon + 1, serverinfo.end()); - } else { - // No port: - *hostname = serverinfo; - port_num->reset(); - } -} - -// Given an already-identified auth section, breaks it into its consituent -// parts. The port number will be parsed and the resulting integer will be -// filled into the given *port variable, or -1 if there is no port number or it -// is invalid. -template -void DoParseAuthority(const CHAR* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num) { - DCHECK(auth.is_valid()) << "We should always get an authority"; - if (auth.len == 0) { - username->reset(); - password->reset(); - hostname->reset(); - port_num->reset(); - return; - } - - // Search backwards for @, which is the separator between the user info and - // the server info. - int i = auth.begin + auth.len - 1; - while (i > auth.begin && spec[i] != '@') - i--; - - if (spec[i] == '@') { - // Found user info: @ - ParseUserInfo(spec, Component(auth.begin, i - auth.begin), - username, password); - ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), - hostname, port_num); - } else { - // No user info, everything is server info. - username->reset(); - password->reset(); - ParseServerInfo(spec, auth, hostname, port_num); - } -} - -template -void ParsePath(const CHAR* spec, - const Component& path, - Component* filepath, - Component* query, - Component* ref) { - // path = [/]//<...>/;?# - - // Special case when there is no path. - if (path.len == -1) { - filepath->reset(); - query->reset(); - ref->reset(); - return; - } - DCHECK(path.len > 0) << "We should never have 0 length paths"; - - // Search for first occurrence of either ? or #. - int path_end = path.begin + path.len; - - int query_separator = -1; // Index of the '?' - int ref_separator = -1; // Index of the '#' - for (int i = path.begin; i < path_end; i++) { - switch (spec[i]) { - case '?': - // Only match the query string if it precedes the reference fragment - // and when we haven't found one already. - if (ref_separator < 0 && query_separator < 0) - query_separator = i; - break; - case '#': - // Record the first # sign only. - if (ref_separator < 0) - ref_separator = i; - break; - } - } - - // Markers pointing to the character after each of these corresponding - // components. The code below words from the end back to the beginning, - // and will update these indices as it finds components that exist. - int file_end, query_end; - - // Ref fragment: from the # to the end of the path. - if (ref_separator >= 0) { - file_end = query_end = ref_separator; - *ref = MakeRange(ref_separator + 1, path_end); - } else { - file_end = query_end = path_end; - ref->reset(); - } - - // Query fragment: everything from the ? to the next boundary (either the end - // of the path or the ref fragment). - if (query_separator >= 0) { - file_end = query_separator; - *query = MakeRange(query_separator + 1, query_end); - } else { - query->reset(); - } - - // File path: treat an empty file path as no file path. - if (file_end != path.begin) - *filepath = MakeRange(path.begin, file_end); - else - filepath->reset(); -} - -template -bool DoExtractScheme(const CHAR* url, - int url_len, - Component* scheme) { - // Skip leading whitespace and control characters. - int begin = 0; - while (begin < url_len && ShouldTrimFromURL(url[begin])) - begin++; - if (begin == url_len) - return false; // Input is empty or all whitespace. - - // Find the first colon character. - for (int i = begin; i < url_len; i++) { - if (url[i] == ':') { - *scheme = MakeRange(begin, i); - return true; - } - } - return false; // No colon found: no scheme -} - -// Fills in all members of the Parsed structure except for the scheme. -// -// |spec| is the full spec being parsed, of length |spec_len|. -// |after_scheme| is the character immediately following the scheme (after the -// colon) where we'll begin parsing. -// -// Compatability data points. I list "host", "path" extracted: -// Input IE6 Firefox Us -// ----- -------------- -------------- -------------- -// http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" -// http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" -// http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/" -// http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/" -// http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" -// -// (*) Interestingly, although IE fails to load these URLs, its history -// canonicalizer handles them, meaning if you've been to the corresponding -// "http://foo.com/" link, it will be colored. -template -void DoParseAfterScheme(const CHAR* spec, - int spec_len, - int after_scheme, - Parsed* parsed) { - int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); - int after_slashes = after_scheme + num_slashes; - - // First split into two main parts, the authority (username, password, host, - // and port) and the full path (path, query, and reference). - Component authority; - Component full_path; - - // Found "//", looks like an authority section. Treat everything - // from there to the next slash (or end of spec) to be the authority. Note - // that we ignore the number of slashes and treat it as the authority. - int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len); - authority = Component(after_slashes, end_auth - after_slashes); - - if (end_auth == spec_len) // No beginning of path found. - full_path = Component(); - else // Everything starting from the slash to the end is the path. - full_path = Component(end_auth, spec_len - end_auth); - - // Now parse those two sub-parts. - DoParseAuthority(spec, authority, &parsed->username, &parsed->password, - &parsed->host, &parsed->port); - ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref); -} - -// The main parsing function for standard URLs. Standard URLs have a scheme, -// host, path, etc. -template -void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) { - DCHECK(spec_len >= 0); - - // Strip leading & trailing spaces and control characters. - int begin = 0; - TrimURL(spec, &begin, &spec_len); - - int after_scheme; - if (DoExtractScheme(spec, spec_len, &parsed->scheme)) { - after_scheme = parsed->scheme.end() + 1; // Skip past the colon. - } else { - // Say there's no scheme when there is no colon. We could also say that - // everything is the scheme. Both would produce an invalid URL, but this way - // seems less wrong in more cases. - parsed->scheme.reset(); - after_scheme = begin; - } - DoParseAfterScheme(spec, spec_len, after_scheme, parsed); -} - -template -void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) { - DCHECK(spec_len >= 0); - - // Get the unused parts of the URL out of the way. - parsed->username.reset(); - parsed->password.reset(); - parsed->host.reset(); - parsed->port.reset(); - parsed->path.reset(); // May use this; reset for convenience. - parsed->ref.reset(); // May use this; reset for convenience. - parsed->query.reset(); // May use this; reset for convenience. - parsed->clear_inner_parsed(); // May use this; reset for convenience. - - // Strip leading & trailing spaces and control characters. - int begin = 0; - TrimURL(spec, &begin, &spec_len); - - // Handle empty specs or ones that contain only whitespace or control chars. - if (begin == spec_len) { - parsed->scheme.reset(); - return; - } - - int inner_start = -1; - - // Extract the scheme. We also handle the case where there is no scheme. - if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { - // Offset the results since we gave ExtractScheme a substring. - parsed->scheme.begin += begin; - - if (parsed->scheme.end() == spec_len - 1) - return; - - inner_start = parsed->scheme.end() + 1; - } else { - // No scheme found; that's not valid for filesystem URLs. - parsed->scheme.reset(); - return; - } - - url_parse::Component inner_scheme; - const CHAR* inner_spec = &spec[inner_start]; - int inner_spec_len = spec_len - inner_start; - - if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) { - // Offset the results since we gave ExtractScheme a substring. - inner_scheme.begin += inner_start; - - if (inner_scheme.end() == spec_len - 1) - return; - } else { - // No scheme found; that's not valid for filesystem URLs. - // The best we can do is return "filesystem://". - return; - } - - Parsed inner_parsed; - - if (url_util::CompareSchemeComponent( - spec, inner_scheme, url_util::kFileScheme)) { - // File URLs are special. - ParseFileURL(inner_spec, inner_spec_len, &inner_parsed); - } else if (url_util::CompareSchemeComponent(spec, inner_scheme, - url_util::kFileSystemScheme)) { - // Filesystem URLs don't nest. - return; - } else if (url_util::IsStandard(spec, inner_scheme)) { - // All "normal" URLs. - DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed); - } else { - return; - } - - // All members of inner_parsed need to be offset by inner_start. - // If we had any scheme that supported nesting more than one level deep, - // we'd have to recurse into the inner_parsed's inner_parsed when - // adjusting by inner_start. - inner_parsed.scheme.begin += inner_start; - inner_parsed.username.begin += inner_start; - inner_parsed.password.begin += inner_start; - inner_parsed.host.begin += inner_start; - inner_parsed.port.begin += inner_start; - inner_parsed.query.begin += inner_start; - inner_parsed.ref.begin += inner_start; - inner_parsed.path.begin += inner_start; - - // Query and ref move from inner_parsed to parsed. - parsed->query = inner_parsed.query; - inner_parsed.query.reset(); - parsed->ref = inner_parsed.ref; - inner_parsed.ref.reset(); - - parsed->set_inner_parsed(inner_parsed); - if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() || - inner_parsed.inner_parsed()) { - return; - } - - // The path in inner_parsed should start with a slash, then have a filesystem - // type followed by a slash. From the first slash up to but excluding the - // second should be what it keeps; the rest goes to parsed. If the path ends - // before the second slash, it's still pretty clear what the user meant, so - // we'll let that through. - if (!IsURLSlash(spec[inner_parsed.path.begin])) { - return; - } - int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash - while (inner_path_end < spec_len && - !IsURLSlash(spec[inner_path_end])) - ++inner_path_end; - parsed->path.begin = inner_path_end; - int new_inner_path_length = inner_path_end - inner_parsed.path.begin; - parsed->path.len = inner_parsed.path.len - new_inner_path_length; - parsed->inner_parsed()->path.len = new_inner_path_length; -} - -// Initializes a path URL which is merely a scheme followed by a path. Examples -// include "about:foo" and "javascript:alert('bar');" -template -void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) { - // Get the non-path and non-scheme parts of the URL out of the way, we never - // use them. - parsed->username.reset(); - parsed->password.reset(); - parsed->host.reset(); - parsed->port.reset(); - parsed->query.reset(); - parsed->ref.reset(); - - // Strip leading & trailing spaces and control characters. - int begin = 0; - TrimURL(spec, &begin, &spec_len); - - // Handle empty specs or ones that contain only whitespace or control chars. - if (begin == spec_len) { - parsed->scheme.reset(); - parsed->path.reset(); - return; - } - - // Extract the scheme, with the path being everything following. We also - // handle the case where there is no scheme. - if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { - // Offset the results since we gave ExtractScheme a substring. - parsed->scheme.begin += begin; - - // For compatability with the standard URL parser, we treat no path as - // -1, rather than having a length of 0 (we normally wouldn't care so - // much for these non-standard URLs). - if (parsed->scheme.end() == spec_len - 1) - parsed->path.reset(); - else - parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len); - } else { - // No scheme found, just path. - parsed->scheme.reset(); - parsed->path = MakeRange(begin, spec_len); - } -} - -template -void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) { - DCHECK(spec_len >= 0); - - // Get the non-path and non-scheme parts of the URL out of the way, we never - // use them. - parsed->username.reset(); - parsed->password.reset(); - parsed->host.reset(); - parsed->port.reset(); - parsed->ref.reset(); - parsed->query.reset(); // May use this; reset for convenience. - - // Strip leading & trailing spaces and control characters. - int begin = 0; - TrimURL(spec, &begin, &spec_len); - - // Handle empty specs or ones that contain only whitespace or control chars. - if (begin == spec_len) { - parsed->scheme.reset(); - parsed->path.reset(); - return; - } - - int path_begin = -1; - int path_end = -1; - - // Extract the scheme, with the path being everything following. We also - // handle the case where there is no scheme. - if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { - // Offset the results since we gave ExtractScheme a substring. - parsed->scheme.begin += begin; - - if (parsed->scheme.end() != spec_len - 1) { - path_begin = parsed->scheme.end() + 1; - path_end = spec_len; - } - } else { - // No scheme found, just path. - parsed->scheme.reset(); - path_begin = begin; - path_end = spec_len; - } - - // Split [path_begin, path_end) into a path + query. - for (int i = path_begin; i < path_end; ++i) { - if (spec[i] == '?') { - parsed->query = MakeRange(i + 1, path_end); - path_end = i; - break; - } - } - - // For compatability with the standard URL parser, treat no path as - // -1, rather than having a length of 0 - if (path_begin == path_end) { - parsed->path.reset(); - } else { - parsed->path = MakeRange(path_begin, path_end); - } -} - -// Converts a port number in a string to an integer. We'd like to just call -// sscanf but our input is not NULL-terminated, which sscanf requires. Instead, -// we copy the digits to a small stack buffer (since we know the maximum number -// of digits in a valid port number) that we can NULL terminate. -template -int DoParsePort(const CHAR* spec, const Component& component) { - // Easy success case when there is no port. - const int kMaxDigits = 5; - if (!component.is_nonempty()) - return PORT_UNSPECIFIED; - - // Skip over any leading 0s. - Component digits_comp(component.end(), 0); - for (int i = 0; i < component.len; i++) { - if (spec[component.begin + i] != '0') { - digits_comp = MakeRange(component.begin + i, component.end()); - break; - } - } - if (digits_comp.len == 0) - return 0; // All digits were 0. - - // Verify we don't have too many digits (we'll be copying to our buffer so - // we need to double-check). - if (digits_comp.len > kMaxDigits) - return PORT_INVALID; - - // Copy valid digits to the buffer. - char digits[kMaxDigits + 1]; // +1 for null terminator - for (int i = 0; i < digits_comp.len; i++) { - CHAR ch = spec[digits_comp.begin + i]; - if (!IsPortDigit(ch)) { - // Invalid port digit, fail. - return PORT_INVALID; - } - digits[i] = static_cast(ch); - } - - // Null-terminate the string and convert to integer. Since we guarantee - // only digits, atoi's lack of error handling is OK. - digits[digits_comp.len] = 0; - int port = atoi(digits); - if (port > 65535) - return PORT_INVALID; // Out of range. - return port; -} - -template -void DoExtractFileName(const CHAR* spec, - const Component& path, - Component* file_name) { - // Handle empty paths: they have no file names. - if (!path.is_nonempty()) { - file_name->reset(); - return; - } - - // Search backwards for a parameter, which is a normally unused field in a - // URL delimited by a semicolon. We parse the parameter as part of the - // path, but here, we don't want to count it. The last semicolon is the - // parameter. The path should start with a slash, so we don't need to check - // the first one. - int file_end = path.end(); - for (int i = path.end() - 1; i > path.begin; i--) { - if (spec[i] == ';') { - file_end = i; - break; - } - } - - // Now search backwards from the filename end to the previous slash - // to find the beginning of the filename. - for (int i = file_end - 1; i >= path.begin; i--) { - if (IsURLSlash(spec[i])) { - // File name is everything following this character to the end - *file_name = MakeRange(i + 1, file_end); - return; - } - } - - // No slash found, this means the input was degenerate (generally paths - // will start with a slash). Let's call everything the file name. - *file_name = MakeRange(path.begin, file_end); - return; -} - -template -bool DoExtractQueryKeyValue(const CHAR* spec, - Component* query, - Component* key, - Component* value) { - if (!query->is_nonempty()) - return false; - - int start = query->begin; - int cur = start; - int end = query->end(); - - // We assume the beginning of the input is the beginning of the "key" and we - // skip to the end of it. - key->begin = cur; - while (cur < end && spec[cur] != '&' && spec[cur] != '=') - cur++; - key->len = cur - key->begin; - - // Skip the separator after the key (if any). - if (cur < end && spec[cur] == '=') - cur++; - - // Find the value part. - value->begin = cur; - while (cur < end && spec[cur] != '&') - cur++; - value->len = cur - value->begin; - - // Finally skip the next separator if any - if (cur < end && spec[cur] == '&') - cur++; - - // Save the new query - *query = url_parse::MakeRange(cur, end); - return true; -} - -} // namespace - -Parsed::Parsed() : inner_parsed_(NULL) { -} - -Parsed::Parsed(const Parsed& other) : - scheme(other.scheme), - username(other.username), - password(other.password), - host(other.host), - port(other.port), - path(other.path), - query(other.query), - ref(other.ref), - inner_parsed_(NULL) { - if (other.inner_parsed_) - set_inner_parsed(*other.inner_parsed_); -} - -Parsed& Parsed::operator=(const Parsed& other) { - if (this != &other) { - scheme = other.scheme; - username = other.username; - password = other.password; - host = other.host; - port = other.port; - path = other.path; - query = other.query; - ref = other.ref; - if (other.inner_parsed_) - set_inner_parsed(*other.inner_parsed_); - else - clear_inner_parsed(); - } - return *this; -} - -Parsed::~Parsed() { - delete inner_parsed_; -} - -int Parsed::Length() const { - if (ref.is_valid()) - return ref.end(); - return CountCharactersBefore(REF, false); -} - -int Parsed::CountCharactersBefore(ComponentType type, - bool include_delimiter) const { - if (type == SCHEME) - return scheme.begin; - - // There will be some characters after the scheme like "://" and we don't - // know how many. Search forwards for the next thing until we find one. - int cur = 0; - if (scheme.is_valid()) - cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme. - - if (username.is_valid()) { - if (type <= USERNAME) - return username.begin; - cur = username.end() + 1; // Advance over the '@' or ':' at the end. - } - - if (password.is_valid()) { - if (type <= PASSWORD) - return password.begin; - cur = password.end() + 1; // Advance over the '@' at the end. - } - - if (host.is_valid()) { - if (type <= HOST) - return host.begin; - cur = host.end(); - } - - if (port.is_valid()) { - if (type < PORT || (type == PORT && include_delimiter)) - return port.begin - 1; // Back over delimiter. - if (type == PORT) - return port.begin; // Don't want delimiter counted. - cur = port.end(); - } - - if (path.is_valid()) { - if (type <= PATH) - return path.begin; - cur = path.end(); - } - - if (query.is_valid()) { - if (type < QUERY || (type == QUERY && include_delimiter)) - return query.begin - 1; // Back over delimiter. - if (type == QUERY) - return query.begin; // Don't want delimiter counted. - cur = query.end(); - } - - if (ref.is_valid()) { - if (type == REF && !include_delimiter) - return ref.begin; // Back over delimiter. - - // When there is a ref and we get here, the component we wanted was before - // this and not found, so we always know the beginning of the ref is right. - return ref.begin - 1; // Don't want delimiter counted. - } - - return cur; -} - -bool ExtractScheme(const char* url, int url_len, Component* scheme) { - return DoExtractScheme(url, url_len, scheme); -} - -bool ExtractScheme(const char16* url, int url_len, Component* scheme) { - return DoExtractScheme(url, url_len, scheme); -} - -// This handles everything that may be an authority terminator, including -// backslash. For special backslash handling see DoParseAfterScheme. -bool IsAuthorityTerminator(char16 ch) { - return IsURLSlash(ch) || ch == '?' || ch == '#'; -} - -void ExtractFileName(const char* url, - const Component& path, - Component* file_name) { - DoExtractFileName(url, path, file_name); -} - -void ExtractFileName(const char16* url, - const Component& path, - Component* file_name) { - DoExtractFileName(url, path, file_name); -} - -bool ExtractQueryKeyValue(const char* url, - Component* query, - Component* key, - Component* value) { - return DoExtractQueryKeyValue(url, query, key, value); -} - -bool ExtractQueryKeyValue(const char16* url, - Component* query, - Component* key, - Component* value) { - return DoExtractQueryKeyValue(url, query, key, value); -} - -void ParseAuthority(const char* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num) { - DoParseAuthority(spec, auth, username, password, hostname, port_num); -} - -void ParseAuthority(const char16* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num) { - DoParseAuthority(spec, auth, username, password, hostname, port_num); -} - -int ParsePort(const char* url, const Component& port) { - return DoParsePort(url, port); -} - -int ParsePort(const char16* url, const Component& port) { - return DoParsePort(url, port); -} - -void ParseStandardURL(const char* url, int url_len, Parsed* parsed) { - DoParseStandardURL(url, url_len, parsed); -} - -void ParseStandardURL(const char16* url, int url_len, Parsed* parsed) { - DoParseStandardURL(url, url_len, parsed); -} - -void ParsePathURL(const char* url, int url_len, Parsed* parsed) { - DoParsePathURL(url, url_len, parsed); -} - -void ParsePathURL(const char16* url, int url_len, Parsed* parsed) { - DoParsePathURL(url, url_len, parsed); -} - -void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) { - DoParseFileSystemURL(url, url_len, parsed); -} - -void ParseFileSystemURL(const char16* url, int url_len, Parsed* parsed) { - DoParseFileSystemURL(url, url_len, parsed); -} - -void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) { - DoParseMailtoURL(url, url_len, parsed); -} - -void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed) { - DoParseMailtoURL(url, url_len, parsed); -} - -void ParsePathInternal(const char* spec, - const Component& path, - Component* filepath, - Component* query, - Component* ref) { - ParsePath(spec, path, filepath, query, ref); -} - -void ParsePathInternal(const char16* spec, - const Component& path, - Component* filepath, - Component* query, - Component* ref) { - ParsePath(spec, path, filepath, query, ref); -} - -void ParseAfterScheme(const char* spec, - int spec_len, - int after_scheme, - Parsed* parsed) { - DoParseAfterScheme(spec, spec_len, after_scheme, parsed); -} - -void ParseAfterScheme(const char16* spec, - int spec_len, - int after_scheme, - Parsed* parsed) { - DoParseAfterScheme(spec, spec_len, after_scheme, parsed); -} - -} // namespace url_parse diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.h.svn-base deleted file mode 100644 index 3dbe98a83..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse.h.svn-base +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLEURL_SRC_URL_PARSE_H__ -#define GOOGLEURL_SRC_URL_PARSE_H__ - -#include - -#include "base/basictypes.h" -#include "base/string16.h" -#include "googleurl/src/url_common.h" - -namespace url_parse { - -// Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and -// KURLGoogle.cpp still rely on this type. -typedef char16 UTF16Char; - -// Component ------------------------------------------------------------------ - -// Represents a substring for URL parsing. -struct Component { - Component() : begin(0), len(-1) {} - - // Normal constructor: takes an offset and a length. - Component(int b, int l) : begin(b), len(l) {} - - int end() const { - return begin + len; - } - - // Returns true if this component is valid, meaning the length is given. Even - // valid components may be empty to record the fact that they exist. - bool is_valid() const { - return (len != -1); - } - - // Returns true if the given component is specified on false, the component - // is either empty or invalid. - bool is_nonempty() const { - return (len > 0); - } - - void reset() { - begin = 0; - len = -1; - } - - bool operator==(const Component& other) const { - return begin == other.begin && len == other.len; - } - - int begin; // Byte offset in the string of this component. - int len; // Will be -1 if the component is unspecified. -}; - -// Helper that returns a component created with the given begin and ending -// points. The ending point is non-inclusive. -inline Component MakeRange(int begin, int end) { - return Component(begin, end - begin); -} - -// Parsed --------------------------------------------------------------------- - -// A structure that holds the identified parts of an input URL. This structure -// does NOT store the URL itself. The caller will have to store the URL text -// and its corresponding Parsed structure separately. -// -// Typical usage would be: -// -// url_parse::Parsed parsed; -// url_parse::Component scheme; -// if (!url_parse::ExtractScheme(url, url_len, &scheme)) -// return I_CAN_NOT_FIND_THE_SCHEME_DUDE; -// -// if (IsStandardScheme(url, scheme)) // Not provided by this component -// url_parseParseStandardURL(url, url_len, &parsed); -// else if (IsFileURL(url, scheme)) // Not provided by this component -// url_parse::ParseFileURL(url, url_len, &parsed); -// else -// url_parse::ParsePathURL(url, url_len, &parsed); -// -struct Parsed { - // Identifies different components. - enum ComponentType { - SCHEME, - USERNAME, - PASSWORD, - HOST, - PORT, - PATH, - QUERY, - REF, - }; - - // The default constructor is sufficient for the components, but inner_parsed_ - // requires special handling. - GURL_API Parsed(); - GURL_API Parsed(const Parsed&); - GURL_API Parsed& operator=(const Parsed&); - GURL_API ~Parsed(); - - // Returns the length of the URL (the end of the last component). - // - // Note that for some invalid, non-canonical URLs, this may not be the length - // of the string. For example "http://": the parsed structure will only - // contain an entry for the four-character scheme, and it doesn't know about - // the "://". For all other last-components, it will return the real length. - GURL_API int Length() const; - - // Returns the number of characters before the given component if it exists, - // or where the component would be if it did exist. This will return the - // string length if the component would be appended to the end. - // - // Note that this can get a little funny for the port, query, and ref - // components which have a delimiter that is not counted as part of the - // component. The |include_delimiter| flag controls if you want this counted - // as part of the component or not when the component exists. - // - // This example shows the difference between the two flags for two of these - // delimited components that is present (the port and query) and one that - // isn't (the reference). The components that this flag affects are marked - // with a *. - // 0 1 2 - // 012345678901234567890 - // Example input: http://foo:80/?query - // include_delim=true, ...=false ("<-" indicates different) - // SCHEME: 0 0 - // USERNAME: 5 5 - // PASSWORD: 5 5 - // HOST: 7 7 - // *PORT: 10 11 <- - // PATH: 13 13 - // *QUERY: 14 15 <- - // *REF: 20 20 - // - GURL_API int CountCharactersBefore(ComponentType type, - bool include_delimiter) const; - - // Scheme without the colon: "http://foo"/ would have a scheme of "http". - // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there - // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed - // to start at the beginning of the string if there are preceeding whitespace - // or control characters. - Component scheme; - - // Username. Specified in URLs with an @ sign before the host. See |password| - Component username; - - // Password. The length will be -1 if unspecified, 0 if specified but empty. - // Not all URLs with a username have a password, as in "http://me@host/". - // The password is separated form the username with a colon, as in - // "http://me:secret@host/" - Component password; - - // Host name. - Component host; - - // Port number. - Component port; - - // Path, this is everything following the host name. Length will be -1 if - // unspecified. This includes the preceeding slash, so the path on - // http://www.google.com/asdf" is "/asdf". As a result, it is impossible to - // have a 0 length path, it will be -1 in cases like "http://host?foo". - // Note that we treat backslashes the same as slashes. - Component path; - - // Stuff between the ? and the # after the path. This does not include the - // preceeding ? character. Length will be -1 if unspecified, 0 if there is - // a question mark but no query string. - Component query; - - // Indicated by a #, this is everything following the hash sign (not - // including it). If there are multiple hash signs, we'll use the last one. - // Length will be -1 if there is no hash sign, or 0 if there is one but - // nothing follows it. - Component ref; - - // This is used for nested URL types, currently only filesystem. If you - // parse a filesystem URL, the resulting Parsed will have a nested - // inner_parsed_ to hold the parsed inner URL's component information. - // For all other url types [including the inner URL], it will be NULL. - Parsed* inner_parsed() const { - return inner_parsed_; - } - - void set_inner_parsed(const Parsed& inner_parsed) { - if (!inner_parsed_) - inner_parsed_ = new Parsed(inner_parsed); - else - *inner_parsed_ = inner_parsed; - } - - void clear_inner_parsed() { - if (inner_parsed_) { - delete inner_parsed_; - inner_parsed_ = NULL; - } - } - - private: - Parsed* inner_parsed_; // This object is owned and managed by this struct. -}; - -// Initialization functions --------------------------------------------------- -// -// These functions parse the given URL, filling in all of the structure's -// components. These functions can not fail, they will always do their best -// at interpreting the input given. -// -// The string length of the URL MUST be specified, we do not check for NULLs -// at any point in the process, and will actually handle embedded NULLs. -// -// IMPORTANT: These functions do NOT hang on to the given pointer or copy it -// in any way. See the comment above the struct. -// -// The 8-bit versions require UTF-8 encoding. - -// StandardURL is for when the scheme is known to be one that has an -// authority (host) like "http". This function will not handle weird ones -// like "about:" and "javascript:", or do the right thing for "file:" URLs. -GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed); -GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); - -// PathURL is for when the scheme is known not to have an authority (host) -// section but that aren't file URLs either. The scheme is parsed, and -// everything after the scheme is considered as the path. This is used for -// things like "about:" and "javascript:" -GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed); -GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed); - -// FileURL is for file URLs. There are some special rules for interpreting -// these. -GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed); -GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed); - -// Filesystem URLs are structured differently than other URLs. -GURL_API void ParseFileSystemURL(const char* url, - int url_len, - Parsed* parsed); -GURL_API void ParseFileSystemURL(const char16* url, - int url_len, - Parsed* parsed); - -// MailtoURL is for mailto: urls. They are made up scheme,path,query -GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); -GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); - -// Helper functions ----------------------------------------------------------- - -// Locates the scheme according to the URL parser's rules. This function is -// designed so the caller can find the scheme and call the correct Init* -// function according to their known scheme types. -// -// It also does not perform any validation on the scheme. -// -// This function will return true if the scheme is found and will put the -// scheme's range into *scheme. False means no scheme could be found. Note -// that a URL beginning with a colon has a scheme, but it is empty, so this -// function will return true but *scheme will = (0,0). -// -// The scheme is found by skipping spaces and control characters at the -// beginning, and taking everything from there to the first colon to be the -// scheme. The character at scheme.end() will be the colon (we may enhance -// this to handle full width colons or something, so don't count on the -// actual character value). The character at scheme.end()+1 will be the -// beginning of the rest of the URL, be it the authority or the path (or the -// end of the string). -// -// The 8-bit version requires UTF-8 encoding. -GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme); -GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme); - -// Returns true if ch is a character that terminates the authority segment -// of a URL. -GURL_API bool IsAuthorityTerminator(char16 ch); - -// Does a best effort parse of input |spec|, in range |auth|. If a particular -// component is not found, it will be set to invalid. -GURL_API void ParseAuthority(const char* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num); -GURL_API void ParseAuthority(const char16* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num); - -// Computes the integer port value from the given port component. The port -// component should have been identified by one of the init functions on -// |Parsed| for the given input url. -// -// The return value will be a positive integer between 0 and 64K, or one of -// the two special values below. -enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; -GURL_API int ParsePort(const char* url, const Component& port); -GURL_API int ParsePort(const char16* url, const Component& port); - -// Extracts the range of the file name in the given url. The path must -// already have been computed by the parse function, and the matching URL -// and extracted path are provided to this function. The filename is -// defined as being everything from the last slash/backslash of the path -// to the end of the path. -// -// The file name will be empty if the path is empty or there is nothing -// following the last slash. -// -// The 8-bit version requires UTF-8 encoding. -GURL_API void ExtractFileName(const char* url, - const Component& path, - Component* file_name); -GURL_API void ExtractFileName(const char16* url, - const Component& path, - Component* file_name); - -// Extract the first key/value from the range defined by |*query|. Updates -// |*query| to start at the end of the extracted key/value pair. This is -// designed for use in a loop: you can keep calling it with the same query -// object and it will iterate over all items in the query. -// -// Some key/value pairs may have the key, the value, or both be empty (for -// example, the query string "?&"). These will be returned. Note that an empty -// last parameter "foo.com?" or foo.com?a&" will not be returned, this case -// is the same as "done." -// -// The initial query component should not include the '?' (this is the default -// for parsed URLs). -// -// If no key/value are found |*key| and |*value| will be unchanged and it will -// return false. -GURL_API bool ExtractQueryKeyValue(const char* url, - Component* query, - Component* key, - Component* value); -GURL_API bool ExtractQueryKeyValue(const char16* url, - Component* query, - Component* key, - Component* value); - -} // namespace url_parse - -#endif // GOOGLEURL_SRC_URL_PARSE_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_file.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_file.cc.svn-base deleted file mode 100644 index 2e8429f51..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_file.cc.svn-base +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "base/logging.h" -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_parse.h" -#include "googleurl/src/url_parse_internal.h" - -// Interesting IE file:isms... -// -// INPUT OUTPUT -// ========================= ============================== -// file:/foo/bar file:///foo/bar -// The result here seems totally invalid!?!? This isn't UNC. -// -// file:/ -// file:// or any other number of slashes -// IE6 doesn't do anything at all if you click on this link. No error: -// nothing. IE6's history system seems to always color this link, so I'm -// guessing that it maps internally to the empty URL. -// -// C:\ file:///C:/ -// When on a file: URL source page, this link will work. When over HTTP, -// the file: URL will appear in the status bar but the link will not work -// (security restriction for all file URLs). -// -// file:foo/ file:foo/ (invalid?!?!?) -// file:/foo/ file:///foo/ (invalid?!?!?) -// file://foo/ file://foo/ (UNC to server "foo") -// file:///foo/ file:///foo/ (invalid, seems to be a file) -// file:////foo/ file://foo/ (UNC to server "foo") -// Any more than four slashes is also treated as UNC. -// -// file:C:/ file://C:/ -// file:/C:/ file://C:/ -// The number of slashes after "file:" don't matter if the thing following -// it looks like an absolute drive path. Also, slashes and backslashes are -// equally valid here. - -namespace url_parse { - -namespace { - -// A subcomponent of DoInitFileURL, the input of this function should be a UNC -// path name, with the index of the first character after the slashes following -// the scheme given in |after_slashes|. This will initialize the host, path, -// query, and ref, and leave the other output components untouched -// (DoInitFileURL handles these for us). -template -void DoParseUNC(const CHAR* spec, - int after_slashes, - int spec_len, - Parsed* parsed) { - int next_slash = FindNextSlash(spec, after_slashes, spec_len); - if (next_slash == spec_len) { - // No additional slash found, as in "file://foo", treat the text as the - // host with no path (this will end up being UNC to server "foo"). - int host_len = spec_len - after_slashes; - if (host_len) - parsed->host = Component(after_slashes, host_len); - else - parsed->host.reset(); - parsed->path.reset(); - return; - } - -#ifdef WIN32 - // See if we have something that looks like a path following the first - // component. As in "file://localhost/c:/", we get "c:/" out. We want to - // treat this as a having no host but the path given. Works on Windows only. - if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) { - parsed->host.reset(); - ParsePathInternal(spec, MakeRange(next_slash, spec_len), - &parsed->path, &parsed->query, &parsed->ref); - return; - } -#endif - - // Otherwise, everything up until that first slash we found is the host name, - // which will end up being the UNC host. For example "file://foo/bar.txt" - // will get a server name of "foo" and a path of "/bar". Later, on Windows, - // this should be treated as the filename "\\foo\bar.txt" in proper UNC - // notation. - int host_len = next_slash - after_slashes; - if (host_len) - parsed->host = MakeRange(after_slashes, next_slash); - else - parsed->host.reset(); - if (next_slash < spec_len) { - ParsePathInternal(spec, MakeRange(next_slash, spec_len), - &parsed->path, &parsed->query, &parsed->ref); - } else { - parsed->path.reset(); - } -} - -// A subcomponent of DoParseFileURL, the input should be a local file, with the -// beginning of the path indicated by the index in |path_begin|. This will -// initialize the host, path, query, and ref, and leave the other output -// components untouched (DoInitFileURL handles these for us). -template -void DoParseLocalFile(const CHAR* spec, - int path_begin, - int spec_len, - Parsed* parsed) { - parsed->host.reset(); - ParsePathInternal(spec, MakeRange(path_begin, spec_len), - &parsed->path, &parsed->query, &parsed->ref); -} - -// Backend for the external functions that operates on either char type. -// We are handed the character after the "file:" at the beginning of the spec. -// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo". -template -void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) { - DCHECK(spec_len >= 0); - - // Get the parts we never use for file URLs out of the way. - parsed->username.reset(); - parsed->password.reset(); - parsed->port.reset(); - - // Many of the code paths don't set these, so it's convenient to just clear - // them. We'll write them in those cases we need them. - parsed->query.reset(); - parsed->ref.reset(); - - // Strip leading & trailing spaces and control characters. - int begin = 0; - TrimURL(spec, &begin, &spec_len); - - // Find the scheme. - int num_slashes; - int after_scheme; - int after_slashes; -#ifdef WIN32 - // See how many slashes there are. We want to handle cases like UNC but also - // "/c:/foo". This is when there is no scheme, so we can allow pages to do - // links like "c:/foo/bar" or "//foo/bar". This is also called by the - // relative URL resolver when it determines there is an absolute URL, which - // may give us input like "/c:/foo". - num_slashes = CountConsecutiveSlashes(spec, begin, spec_len); - after_slashes = begin + num_slashes; - if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) { - // Windows path, don't try to extract the scheme (for example, "c:\foo"). - parsed->scheme.reset(); - after_scheme = after_slashes; - } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) { - // Windows UNC path: don't try to extract the scheme, but keep the slashes. - parsed->scheme.reset(); - after_scheme = begin; - } else -#endif - { - if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { - // Offset the results since we gave ExtractScheme a substring. - parsed->scheme.begin += begin; - after_scheme = parsed->scheme.end() + 1; - } else { - // No scheme found, remember that. - parsed->scheme.reset(); - after_scheme = begin; - } - } - - // Handle empty specs ones that contain only whitespace or control chars, - // or that are just the scheme (for example "file:"). - if (after_scheme == spec_len) { - parsed->host.reset(); - parsed->path.reset(); - return; - } - - num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); - - after_slashes = after_scheme + num_slashes; -#ifdef WIN32 - // Check whether the input is a drive again. We checked above for windows - // drive specs, but that's only at the very beginning to see if we have a - // scheme at all. This test will be duplicated in that case, but will - // additionally handle all cases with a real scheme such as "file:///C:/". - if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) && - num_slashes != 3) { - // Anything not beginning with a drive spec ("c:\") on Windows is treated - // as UNC, with the exception of three slashes which always means a file. - // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails. - DoParseUNC(spec, after_slashes, spec_len, parsed); - return; - } -#else - // file: URL with exactly 2 slashes is considered to have a host component. - if (num_slashes == 2) { - DoParseUNC(spec, after_slashes, spec_len, parsed); - return; - } -#endif // WIN32 - - // Easy and common case, the full path immediately follows the scheme - // (modulo slashes), as in "file://c:/foo". Just treat everything from - // there to the end as the path. Empty hosts have 0 length instead of -1. - // We include the last slash as part of the path if there is one. - DoParseLocalFile(spec, - num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, - spec_len, parsed); -} - -} // namespace - -void ParseFileURL(const char* url, int url_len, Parsed* parsed) { - DoParseFileURL(url, url_len, parsed); -} - -void ParseFileURL(const char16* url, int url_len, Parsed* parsed) { - DoParseFileURL(url, url_len, parsed); -} - -} // namespace url_parse diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_internal.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_internal.h.svn-base deleted file mode 100644 index 61bd0687f..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_internal.h.svn-base +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Contains common inline helper functions used by the URL parsing routines. - -#ifndef GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__ -#define GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__ - -#include "googleurl/src/url_parse.h" - -namespace url_parse { - -// We treat slashes and backslashes the same for IE compatability. -inline bool IsURLSlash(char16 ch) { - return ch == '/' || ch == '\\'; -} - -// Returns true if we should trim this character from the URL because it is a -// space or a control character. -inline bool ShouldTrimFromURL(char16 ch) { - return ch <= ' '; -} - -// Given an already-initialized begin index and length, this shrinks the range -// to eliminate "should-be-trimmed" characters. Note that the length does *not* -// indicate the length of untrimmed data from |*begin|, but rather the position -// in the input string (so the string starts at character |*begin| in the spec, -// and goes until |*len|). -template -inline void TrimURL(const CHAR* spec, int* begin, int* len) { - // Strip leading whitespace and control characters. - while (*begin < *len && ShouldTrimFromURL(spec[*begin])) - (*begin)++; - - // Strip trailing whitespace and control characters. We need the >i test for - // when the input string is all blanks; we don't want to back past the input. - while (*len > *begin && ShouldTrimFromURL(spec[*len - 1])) - (*len)--; -} - -// Counts the number of consecutive slashes starting at the given offset -// in the given string of the given length. -template -inline int CountConsecutiveSlashes(const CHAR *str, - int begin_offset, int str_len) { - int count = 0; - while (begin_offset + count < str_len && - IsURLSlash(str[begin_offset + count])) - ++count; - return count; -} - -// Internal functions in url_parse.cc that parse the path, that is, everything -// following the authority section. The input is the range of everything -// following the authority section, and the output is the identified ranges. -// -// This is designed for the file URL parser or other consumers who may do -// special stuff at the beginning, but want regular path parsing, it just -// maps to the internal parsing function for paths. -void ParsePathInternal(const char* spec, - const Component& path, - Component* filepath, - Component* query, - Component* ref); -void ParsePathInternal(const char16* spec, - const Component& path, - Component* filepath, - Component* query, - Component* ref); - - -// Given a spec and a pointer to the character after the colon following the -// scheme, this parses it and fills in the structure, Every item in the parsed -// structure is filled EXCEPT for the scheme, which is untouched. -void ParseAfterScheme(const char* spec, - int spec_len, - int after_scheme, - Parsed* parsed); -void ParseAfterScheme(const char16* spec, - int spec_len, - int after_scheme, - Parsed* parsed); - -} // namespace url_parse - -#endif // GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_unittest.cc.svn-base deleted file mode 100644 index cc3eb1b63..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_parse_unittest.cc.svn-base +++ /dev/null @@ -1,649 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "base/basictypes.h" -#include "googleurl/src/url_parse.h" -#include "testing/gtest/include/gtest/gtest.h" - -// Some implementations of base/basictypes.h may define ARRAYSIZE. -// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro -// which is in our version of basictypes.h. -#ifndef ARRAYSIZE -#define ARRAYSIZE ARRAYSIZE_UNSAFE -#endif - -// Interesting IE file:isms... -// -// file:/foo/bar file:///foo/bar -// The result here seems totally invalid!?!? This isn't UNC. -// -// file:/ -// file:// or any other number of slashes -// IE6 doesn't do anything at all if you click on this link. No error: -// nothing. IE6's history system seems to always color this link, so I'm -// guessing that it maps internally to the empty URL. -// -// C:\ file:///C:/ -// / file:///C:/ -// /foo file:///C:/foo -// Interestingly, IE treats "/" as an alias for "c:\", which makes sense, -// but is weird to think about on Windows. -// -// file:foo/ file:foo/ (invalid?!?!?) -// file:/foo/ file:///foo/ (invalid?!?!?) -// file://foo/ file://foo/ (UNC to server "foo") -// file:///foo/ file:///foo/ (invalid) -// file:////foo/ file://foo/ (UNC to server "foo") -// Any more than four slashes is also treated as UNC. -// -// file:C:/ file://C:/ -// file:/C:/ file://C:/ -// The number of slashes after "file:" don't matter if the thing following -// it looks like an absolute drive path. Also, slashes and backslashes are -// equally valid here. - -namespace { - -// Used for regular URL parse cases. -struct URLParseCase { - const char* input; - - const char* scheme; - const char* username; - const char* password; - const char* host; - int port; - const char* path; - const char* query; - const char* ref; -}; - -// Simpler version of URLParseCase for testing path URLs. -struct PathURLParseCase { - const char* input; - - const char* scheme; - const char* path; -}; - -// Simpler version of URLParseCase for testing mailto URLs. -struct MailtoURLParseCase { - const char* input; - - const char* scheme; - const char* path; - const char* query; -}; - -// More complicated version of URLParseCase for testing filesystem URLs. -struct FileSystemURLParseCase { - const char* input; - - const char* inner_scheme; - const char* inner_username; - const char* inner_password; - const char* inner_host; - int inner_port; - const char* inner_path; - const char* path; - const char* query; - const char* ref; -}; - -bool ComponentMatches(const char* input, - const char* reference, - const url_parse::Component& component) { - // If the component is nonexistant (length == -1), it should begin at 0. - EXPECT_TRUE(component.len >= 0 || component.len == -1); - - // Begin should be valid. - EXPECT_LE(0, component.begin); - - // A NULL reference means the component should be nonexistant. - if (!reference) - return component.len == -1; - if (component.len < 0) - return false; // Reference is not NULL but we don't have anything - - if (strlen(reference) != static_cast(component.len)) - return false; // Lengths don't match - - // Now check the actual characters. - return strncmp(reference, &input[component.begin], component.len) == 0; -} - -void ExpectInvalidComponent(const url_parse::Component& component) { - EXPECT_EQ(0, component.begin); - EXPECT_EQ(-1, component.len); -} - -} // namespace - -// Parsed ---------------------------------------------------------------------- - -TEST(URLParser, Length) { - const char* length_cases[] = { - // One with everything in it. - "http://user:pass@host:99/foo?bar#baz", - // One with nothing in it. - "", - // Working backwards, let's start taking off stuff from the full one. - "http://user:pass@host:99/foo?bar#", - "http://user:pass@host:99/foo?bar", - "http://user:pass@host:99/foo?", - "http://user:pass@host:99/foo", - "http://user:pass@host:99/", - "http://user:pass@host:99", - "http://user:pass@host:", - "http://user:pass@host", - "http://host", - "http://user@", - "http:", - }; - for (size_t i = 0; i < arraysize(length_cases); i++) { - int true_length = static_cast(strlen(length_cases[i])); - - url_parse::Parsed parsed; - url_parse::ParseStandardURL(length_cases[i], true_length, &parsed); - - EXPECT_EQ(true_length, parsed.Length()); - } -} - -TEST(URLParser, CountCharactersBefore) { - using namespace url_parse; - struct CountCase { - const char* url; - Parsed::ComponentType component; - bool include_delimiter; - int expected_count; - } count_cases[] = { - // Test each possibility in the case where all components are present. -// 0 1 2 -// 0123456789012345678901 - {"http://u:p@h:8/p?q#r", Parsed::SCHEME, true, 0}, - {"http://u:p@h:8/p?q#r", Parsed::SCHEME, false, 0}, - {"http://u:p@h:8/p?q#r", Parsed::USERNAME, true, 7}, - {"http://u:p@h:8/p?q#r", Parsed::USERNAME, false, 7}, - {"http://u:p@h:8/p?q#r", Parsed::PASSWORD, true, 9}, - {"http://u:p@h:8/p?q#r", Parsed::PASSWORD, false, 9}, - {"http://u:p@h:8/p?q#r", Parsed::HOST, true, 11}, - {"http://u:p@h:8/p?q#r", Parsed::HOST, false, 11}, - {"http://u:p@h:8/p?q#r", Parsed::PORT, true, 12}, - {"http://u:p@h:8/p?q#r", Parsed::PORT, false, 13}, - {"http://u:p@h:8/p?q#r", Parsed::PATH, false, 14}, - {"http://u:p@h:8/p?q#r", Parsed::PATH, true, 14}, - {"http://u:p@h:8/p?q#r", Parsed::QUERY, true, 16}, - {"http://u:p@h:8/p?q#r", Parsed::QUERY, false, 17}, - {"http://u:p@h:8/p?q#r", Parsed::REF, true, 18}, - {"http://u:p@h:8/p?q#r", Parsed::REF, false, 19}, - // Now test when the requested component is missing. - {"http://u:p@h:8/p?", Parsed::REF, true, 17}, - {"http://u:p@h:8/p?q", Parsed::REF, true, 18}, - {"http://u:p@h:8/p#r", Parsed::QUERY, true, 16}, - {"http://u:p@h:8#r", Parsed::PATH, true, 14}, - {"http://u:p@h/", Parsed::PORT, true, 12}, - {"http://u:p@/", Parsed::HOST, true, 11}, - // This case is a little weird. It will report that the password would - // start where the host begins. This is arguably correct, although you - // could also argue that it should start at the '@' sign. Doing it - // starting with the '@' sign is actually harder, so we don't bother. - {"http://u@h/", Parsed::PASSWORD, true, 9}, - {"http://h/", Parsed::USERNAME, true, 7}, - {"http:", Parsed::USERNAME, true, 5}, - {"", Parsed::SCHEME, true, 0}, - // Make sure a random component still works when there's nothing there. - {"", Parsed::REF, true, 0}, - // File URLs are special with no host, so we test those. - {"file:///c:/foo", Parsed::USERNAME, true, 7}, - {"file:///c:/foo", Parsed::PASSWORD, true, 7}, - {"file:///c:/foo", Parsed::HOST, true, 7}, - {"file:///c:/foo", Parsed::PATH, true, 7}, - }; - for (size_t i = 0; i < ARRAYSIZE(count_cases); i++) { - int length = static_cast(strlen(count_cases[i].url)); - - // Simple test to distinguish file and standard URLs. - url_parse::Parsed parsed; - if (length > 0 && count_cases[i].url[0] == 'f') - url_parse::ParseFileURL(count_cases[i].url, length, &parsed); - else - url_parse::ParseStandardURL(count_cases[i].url, length, &parsed); - - int chars_before = parsed.CountCharactersBefore( - count_cases[i].component, count_cases[i].include_delimiter); - EXPECT_EQ(count_cases[i].expected_count, chars_before); - } -} - -// Standard -------------------------------------------------------------------- - -// Input Scheme Usrname Passwd Host Port Path Query Ref -// ------------------------------------ ------- ------- ---------- ------------ --- ---------- ------------ ----- -static URLParseCase cases[] = { - // Regular URL with all the parts -{"http://user:pass@foo:21/bar;par?b#c", "http", "user", "pass", "foo", 21, "/bar;par","b", "c"}, - - // Known schemes should lean towards authority identification -{"http:foo.com", "http", NULL, NULL, "foo.com", -1, NULL, NULL, NULL}, - - // Spaces! -{"\t :foo.com \n", "", NULL, NULL, "foo.com", -1, NULL, NULL, NULL}, -{" foo.com ", NULL, NULL, NULL, "foo.com", -1, NULL, NULL, NULL}, -{"a:\t foo.com", "a", NULL, NULL, "\t foo.com", -1, NULL, NULL, NULL}, -{"http://f:21/ b ? d # e ", "http", NULL, NULL, "f", 21, "/ b ", " d ", " e"}, - - // Invalid port numbers should be identified and turned into -2, empty port - // numbers should be -1. Spaces aren't allowed in port numbers -{"http://f:/c", "http", NULL, NULL, "f", -1, "/c", NULL, NULL}, -{"http://f:0/c", "http", NULL, NULL, "f", 0, "/c", NULL, NULL}, -{"http://f:00000000000000/c", "http", NULL, NULL, "f", 0, "/c", NULL, NULL}, -{"http://f:00000000000000000000080/c", "http", NULL, NULL, "f", 80, "/c", NULL, NULL}, -{"http://f:b/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f: /c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f:\n/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f:fifty-two/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f:999999/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f: 21 / b ? d # e ", "http", NULL, NULL, "f", -2, "/ b ", " d ", " e"}, - - // Creative URLs missing key elements -{"", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{" \t", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":foo.com/", "", NULL, NULL, "foo.com", -1, "/", NULL, NULL}, -{":foo.com\\", "", NULL, NULL, "foo.com", -1, "\\", NULL, NULL}, -{":", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":a", "", NULL, NULL, "a", -1, NULL, NULL, NULL}, -{":/", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":\\", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":#", "", NULL, NULL, NULL, -1, NULL, NULL, ""}, -{"#", NULL, NULL, NULL, NULL, -1, NULL, NULL, ""}, -{"#/", NULL, NULL, NULL, NULL, -1, NULL, NULL, "/"}, -{"#\\", NULL, NULL, NULL, NULL, -1, NULL, NULL, "\\"}, -{"#;?", NULL, NULL, NULL, NULL, -1, NULL, NULL, ";?"}, -{"?", NULL, NULL, NULL, NULL, -1, NULL, "", NULL}, -{"/", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":23", "", NULL, NULL, "23", -1, NULL, NULL, NULL}, -{"/:23", "/", NULL, NULL, "23", -1, NULL, NULL, NULL}, -{"//", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"::", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"::23", "", NULL, NULL, NULL, 23, NULL, NULL, NULL}, -{"foo://", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL}, - - // Username/passwords and things that look like them -{"http://a:b@c:29/d", "http", "a", "b", "c", 29, "/d", NULL, NULL}, -{"http::@c:29", "http", "", "", "c", 29, NULL, NULL, NULL}, - // ... "]" in the password field isn't allowed, but we tolerate it here... -{"http://&a:foo(b]c@d:2/", "http", "&a", "foo(b]c", "d", 2, "/", NULL, NULL}, -{"http://::@c@d:2", "http", "", ":@c", "d", 2, NULL, NULL, NULL}, -{"http://foo.com:b@d/", "http", "foo.com", "b", "d", -1, "/", NULL, NULL}, - -{"http://foo.com/\\@", "http", NULL, NULL, "foo.com", -1, "/\\@", NULL, NULL}, -{"http:\\\\foo.com\\", "http", NULL, NULL, "foo.com", -1, "\\", NULL, NULL}, -{"http:\\\\a\\b:c\\d@foo.com\\", "http", NULL, NULL, "a", -1, "\\b:c\\d@foo.com\\", NULL, NULL}, - - // Tolerate different numbers of slashes. -{"foo:/", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"foo:/bar.com/", "foo", NULL, NULL, "bar.com", -1, "/", NULL, NULL}, -{"foo://///////", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"foo://///////bar.com/", "foo", NULL, NULL, "bar.com", -1, "/", NULL, NULL}, -{"foo:////://///", "foo", NULL, NULL, NULL, -1, "/////", NULL, NULL}, - - // Raw file paths on Windows aren't handled by the parser. -{"c:/foo", "c", NULL, NULL, "foo", -1, NULL, NULL, NULL}, -{"//foo/bar", NULL, NULL, NULL, "foo", -1, "/bar", NULL, NULL}, - - // Use the first question mark for the query and the ref. -{"http://foo/path;a??e#f#g", "http", NULL, NULL, "foo", -1, "/path;a", "?e", "f#g"}, -{"http://foo/abcd?efgh?ijkl", "http", NULL, NULL, "foo", -1, "/abcd", "efgh?ijkl", NULL}, -{"http://foo/abcd#foo?bar", "http", NULL, NULL, "foo", -1, "/abcd", NULL, "foo?bar"}, - - // IPv6, check also interesting uses of colons. -{"[61:24:74]:98", "[61", NULL, NULL, "24:74]", 98, NULL, NULL, NULL}, -{"http://[61:27]:98", "http", NULL, NULL, "[61:27]", 98, NULL, NULL, NULL}, -{"http:[61:27]/:foo", "http", NULL, NULL, "[61:27]", -1, "/:foo", NULL, NULL}, -{"http://[1::2]:3:4", "http", NULL, NULL, "[1::2]:3", 4, NULL, NULL, NULL}, - - // Partially-complete IPv6 literals, and related cases. -{"http://2001::1", "http", NULL, NULL, "2001:", 1, NULL, NULL, NULL}, -{"http://[2001::1", "http", NULL, NULL, "[2001::1", -1, NULL, NULL, NULL}, -{"http://2001::1]", "http", NULL, NULL, "2001::1]", -1, NULL, NULL, NULL}, -{"http://2001::1]:80", "http", NULL, NULL, "2001::1]", 80, NULL, NULL, NULL}, -{"http://[2001::1]", "http", NULL, NULL, "[2001::1]", -1, NULL, NULL, NULL}, -{"http://[2001::1]:80", "http", NULL, NULL, "[2001::1]", 80, NULL, NULL, NULL}, -{"http://[[::]]", "http", NULL, NULL, "[[::]]", -1, NULL, NULL, NULL}, - -}; - -TEST(URLParser, Standard) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the constructor. - url_parse::Parsed parsed; - for (size_t i = 0; i < arraysize(cases); i++) { - const char* url = cases[i].input; - url_parse::ParseStandardURL(url, static_cast(strlen(url)), &parsed); - int port = url_parse::ParsePort(url, parsed.port); - - EXPECT_TRUE(ComponentMatches(url, cases[i].scheme, parsed.scheme)); - EXPECT_TRUE(ComponentMatches(url, cases[i].username, parsed.username)); - EXPECT_TRUE(ComponentMatches(url, cases[i].password, parsed.password)); - EXPECT_TRUE(ComponentMatches(url, cases[i].host, parsed.host)); - EXPECT_EQ(cases[i].port, port); - EXPECT_TRUE(ComponentMatches(url, cases[i].path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, cases[i].query, parsed.query)); - EXPECT_TRUE(ComponentMatches(url, cases[i].ref, parsed.ref)); - } -} - -// PathURL -------------------------------------------------------------------- - -// Various incarnations of path URLs. -static PathURLParseCase path_cases[] = { -{"", NULL, NULL}, -{":", "", NULL}, -{":/", "", "/"}, -{"/", NULL, "/"}, -{" This is \\interesting// \t", NULL, "This is \\interesting//"}, -{"about:", "about", NULL}, -{"about:blank", "about", "blank"}, -{" about: blank ", "about", " blank"}, -{"javascript :alert(\"He:/l\\l#o?foo\"); ", "javascript ", "alert(\"He:/l\\l#o?foo\");"}, -}; - -TEST(URLParser, PathURL) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the construtor. - url_parse::Parsed parsed; - for (size_t i = 0; i < arraysize(path_cases); i++) { - const char* url = path_cases[i].input; - url_parse::ParsePathURL(url, static_cast(strlen(url)), &parsed); - - EXPECT_TRUE(ComponentMatches(url, path_cases[i].scheme, parsed.scheme)); - EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.path)); - - // The remaining components are never used for path urls. - ExpectInvalidComponent(parsed.username); - ExpectInvalidComponent(parsed.password); - ExpectInvalidComponent(parsed.host); - ExpectInvalidComponent(parsed.port); - ExpectInvalidComponent(parsed.query); - ExpectInvalidComponent(parsed.ref); - } -} - -#ifdef WIN32 - -// WindowsFile ---------------------------------------------------------------- - -// Various incarnations of file URLs. These are for Windows only. -static URLParseCase file_cases[] = { -{"file:server", "file", NULL, NULL, "server", -1, NULL, NULL, NULL}, -{" file: server \t", "file", NULL, NULL, " server",-1, NULL, NULL, NULL}, -{"FiLe:c|", "FiLe", NULL, NULL, NULL, -1, "c|", NULL, NULL}, -{"FILE:/\\\\/server/file", "FILE", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL}, -{"file://localhost/c:/", "file", NULL, NULL, NULL, -1, "/c:/", NULL, NULL}, -{"file://127.0.0.1/c|\\", "file", NULL, NULL, NULL, -1, "/c|\\", NULL, NULL}, -{"file:/", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL}, - // If there is a Windows drive letter, treat any number of slashes as the - // path part. -{"file:c:\\fo\\b", "file", NULL, NULL, NULL, -1, "c:\\fo\\b", NULL, NULL}, -{"file:/c:\\foo/bar", "file", NULL, NULL, NULL, -1, "/c:\\foo/bar",NULL, NULL}, -{"file://c:/f\\b", "file", NULL, NULL, NULL, -1, "/c:/f\\b", NULL, NULL}, -{"file:///C:/foo", "file", NULL, NULL, NULL, -1, "/C:/foo", NULL, NULL}, -{"file://///\\/\\/c:\\f\\b", "file", NULL, NULL, NULL, -1, "/c:\\f\\b", NULL, NULL}, - // If there is not a drive letter, we should treat is as UNC EXCEPT for - // three slashes, which we treat as a Unix style path. -{"file:server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file:/server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file://server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file:///server/file", "file", NULL, NULL, NULL, -1, "/server/file",NULL, NULL}, -{"file://\\server/file", "file", NULL, NULL, NULL, -1, "\\server/file",NULL, NULL}, -{"file:////server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, - // Queries and refs are valid for file URLs as well. -{"file:///C:/foo.html?#", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "", ""}, -{"file:///C:/foo.html?query=yes#ref", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "query=yes", "ref"}, -}; - -TEST(URLParser, WindowsFile) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the construtor. - url_parse::Parsed parsed; - for (int i = 0; i < arraysize(file_cases); i++) { - const char* url = file_cases[i].input; - url_parse::ParseFileURL(url, static_cast(strlen(url)), &parsed); - int port = url_parse::ParsePort(url, parsed.port); - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme)); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username)); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password)); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host)); - EXPECT_EQ(file_cases[i].port, port); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query)); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref)); - } -} - -#endif // WIN32 - -TEST(URLParser, ExtractFileName) { - struct FileCase { - const char* input; - const char* expected; - } file_cases[] = { - {"http://www.google.com", NULL}, - {"http://www.google.com/", ""}, - {"http://www.google.com/search", "search"}, - {"http://www.google.com/search/", ""}, - {"http://www.google.com/foo/bar.html?baz=22", "bar.html"}, - {"http://www.google.com/foo/bar.html#ref", "bar.html"}, - {"http://www.google.com/search/;param", ""}, - {"http://www.google.com/foo/bar.html;param#ref", "bar.html"}, - {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html;foo"}, - {"http://www.google.com/foo/bar.html?query#ref", "bar.html"}, - }; - - for (size_t i = 0; i < ARRAYSIZE(file_cases); i++) { - const char* url = file_cases[i].input; - int len = static_cast(strlen(url)); - - url_parse::Parsed parsed; - url_parse::ParseStandardURL(url, len, &parsed); - - url_parse::Component file_name; - url_parse::ExtractFileName(url, parsed.path, &file_name); - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].expected, file_name)); - } -} - -// Returns true if the parameter with index |parameter| in the given URL's -// query string. The expected key can be NULL to indicate no such key index -// should exist. The parameter number is 1-based. -static bool NthParameterIs(const char* url, - int parameter, - const char* expected_key, - const char* expected_value) { - url_parse::Parsed parsed; - url_parse::ParseStandardURL(url, static_cast(strlen(url)), &parsed); - - url_parse::Component query = parsed.query; - - for (int i = 1; i <= parameter; i++) { - url_parse::Component key, value; - if (!url_parse::ExtractQueryKeyValue(url, &query, &key, &value)) { - if (parameter >= i && !expected_key) - return true; // Expected nonexistant key, got one. - return false; // Not enough keys. - } - - if (i == parameter) { - if (!expected_key) - return false; - - if (strncmp(&url[key.begin], expected_key, key.len) != 0) - return false; - if (strncmp(&url[value.begin], expected_value, value.len) != 0) - return false; - return true; - } - } - return expected_key == NULL; // We didn't find that many parameters. -} - -TEST(URLParser, ExtractQueryKeyValue) { - EXPECT_TRUE(NthParameterIs("http://www.google.com", 1, NULL, NULL)); - - // Basic case. - char a[] = "http://www.google.com?arg1=1&arg2=2&bar"; - EXPECT_TRUE(NthParameterIs(a, 1, "arg1", "1")); - EXPECT_TRUE(NthParameterIs(a, 2, "arg2", "2")); - EXPECT_TRUE(NthParameterIs(a, 3, "bar", "")); - EXPECT_TRUE(NthParameterIs(a, 4, NULL, NULL)); - - // Empty param at the end. - char b[] = "http://www.google.com?foo=bar&"; - EXPECT_TRUE(NthParameterIs(b, 1, "foo", "bar")); - EXPECT_TRUE(NthParameterIs(b, 2, NULL, NULL)); - - // Empty param at the beginning. - char c[] = "http://www.google.com?&foo=bar"; - EXPECT_TRUE(NthParameterIs(c, 1, "", "")); - EXPECT_TRUE(NthParameterIs(c, 2, "foo", "bar")); - EXPECT_TRUE(NthParameterIs(c, 3, NULL, NULL)); - - // Empty key with value. - char d[] = "http://www.google.com?=foo"; - EXPECT_TRUE(NthParameterIs(d, 1, "", "foo")); - EXPECT_TRUE(NthParameterIs(d, 2, NULL, NULL)); - - // Empty value with key. - char e[] = "http://www.google.com?foo="; - EXPECT_TRUE(NthParameterIs(e, 1, "foo", "")); - EXPECT_TRUE(NthParameterIs(e, 2, NULL, NULL)); - - // Empty key and values. - char f[] = "http://www.google.com?&&==&="; - EXPECT_TRUE(NthParameterIs(f, 1, "", "")); - EXPECT_TRUE(NthParameterIs(f, 2, "", "")); - EXPECT_TRUE(NthParameterIs(f, 3, "", "=")); - EXPECT_TRUE(NthParameterIs(f, 4, "", "")); - EXPECT_TRUE(NthParameterIs(f, 5, NULL, NULL)); -} - -// MailtoURL -------------------------------------------------------------------- - -static MailtoURLParseCase mailto_cases[] = { -//|input |scheme |path |query -{"mailto:foo@gmail.com", "mailto", "foo@gmail.com", NULL}, -{" mailto: to \t", "mailto", " to", NULL}, -{"mailto:addr1%2C%20addr2 ", "mailto", "addr1%2C%20addr2", NULL}, -{"Mailto:addr1, addr2 ", "Mailto", "addr1, addr2", NULL}, -{"mailto:addr1:addr2 ", "mailto", "addr1:addr2", NULL}, -{"mailto:?to=addr1,addr2", "mailto", NULL, "to=addr1,addr2"}, -{"mailto:?to=addr1%2C%20addr2", "mailto", NULL, "to=addr1%2C%20addr2"}, -{"mailto:addr1?to=addr2", "mailto", "addr1", "to=addr2"}, -{"mailto:?body=#foobar#", "mailto", NULL, "body=#foobar#",}, -{"mailto:#?body=#foobar#", "mailto", "#", "body=#foobar#"}, -}; - -TEST(URLParser, MailtoUrl) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the construtor. - url_parse::Parsed parsed; - for (size_t i = 0; i < arraysize(mailto_cases); ++i) { - const char* url = mailto_cases[i].input; - url_parse::ParseMailtoURL(url, static_cast(strlen(url)), &parsed); - int port = url_parse::ParsePort(url, parsed.port); - - EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].scheme, parsed.scheme)); - EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query)); - EXPECT_EQ(url_parse::PORT_UNSPECIFIED, port); - - // The remaining components are never used for mailto urls. - ExpectInvalidComponent(parsed.username); - ExpectInvalidComponent(parsed.password); - ExpectInvalidComponent(parsed.port); - ExpectInvalidComponent(parsed.ref); - } -} - -// Various incarnations of filesystem URLs. -static FileSystemURLParseCase filesystem_cases[] = { - // Regular URL with all the parts -{"filesystem:http://user:pass@foo:21/temporary/bar;par?b#c", "http", "user", "pass", "foo", 21, "/temporary", "/bar;par", "b", "c"}, -{"filesystem:https://foo/persistent/bar;par/", "https", NULL, NULL, "foo", -1, "/persistent", "/bar;par/", NULL, NULL}, -{"filesystem:file:///persistent/bar;par/", "file", NULL, NULL, NULL, -1, "/persistent", "/bar;par/", NULL, NULL}, -{"filesystem:file:///persistent/bar;par/?query#ref", "file", NULL, NULL, NULL, -1, "/persistent", "/bar;par/", "query", "ref"}, -{"filesystem:file:///persistent", "file", NULL, NULL, NULL, -1, "/persistent", "", NULL, NULL}, -}; - -TEST(URLParser, FileSystemURL) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the construtor. - url_parse::Parsed parsed; - for (size_t i = 0; i < arraysize(filesystem_cases); i++) { - const FileSystemURLParseCase* parsecase = &filesystem_cases[i]; - const char* url = parsecase->input; - url_parse::ParseFileSystemURL(url, static_cast(strlen(url)), &parsed); - - EXPECT_TRUE(ComponentMatches(url, "filesystem", parsed.scheme)); - EXPECT_EQ(!parsecase->inner_scheme, !parsed.inner_parsed()); - // Only check the inner_parsed if there is one. - if (parsed.inner_parsed()) { - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_scheme, - parsed.inner_parsed()->scheme)); - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_username, - parsed.inner_parsed()->username)); - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_password, - parsed.inner_parsed()->password)); - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_host, - parsed.inner_parsed()->host)); - int port = url_parse::ParsePort(url, parsed.inner_parsed()->port); - EXPECT_EQ(parsecase->inner_port, port); - - // The remaining components are never used for filesystem urls. - ExpectInvalidComponent(parsed.inner_parsed()->query); - ExpectInvalidComponent(parsed.inner_parsed()->ref); - } - - EXPECT_TRUE(ComponentMatches(url, parsecase->path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query)); - EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref)); - - // The remaining components are never used for filesystem urls. - ExpectInvalidComponent(parsed.username); - ExpectInvalidComponent(parsed.password); - ExpectInvalidComponent(parsed.host); - ExpectInvalidComponent(parsed.port); - } -} - diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_test_utils.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_test_utils.h.svn-base deleted file mode 100644 index 6278e3fbb..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_test_utils.h.svn-base +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2007 Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Convenience functions for string conversions. -// These are mostly intended for use in unit tests. - -#ifndef GOOGLEURL_SRC_URL_TEST_UTILS_H__ -#define GOOGLEURL_SRC_URL_TEST_UTILS_H__ - -#include - -#include "base/string16.h" -#include "googleurl/src/url_canon_internal.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace url_test_utils { - -// Converts a UTF-16 string from native wchar_t format to char16, by -// truncating the high 32 bits. This is not meant to handle true UTF-32 -// encoded strings. -inline string16 WStringToUTF16(const wchar_t* src) { - string16 str; - int length = static_cast(wcslen(src)); - for (int i = 0; i < length; ++i) { - str.push_back(static_cast(src[i])); - } - return str; -} - -// Converts a string from UTF-8 to UTF-16 -inline string16 ConvertUTF8ToUTF16(const std::string& src) { - int length = static_cast(src.length()); - EXPECT_LT(length, 1024); - url_canon::RawCanonOutputW<1024> output; - EXPECT_TRUE(url_canon::ConvertUTF8ToUTF16(src.data(), length, &output)); - return string16(output.data(), output.length()); -} - -// Converts a string from UTF-16 to UTF-8 -inline std::string ConvertUTF16ToUTF8(const string16& src) { - std::string str; - url_canon::StdStringCanonOutput output(&str); - EXPECT_TRUE(url_canon::ConvertUTF16ToUTF8(src.data(), - static_cast(src.length()), - &output)); - output.Complete(); - return str; -} - -} // namespace url_test_utils - -#endif // GOOGLEURL_SRC_URL_TEST_UTILS_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.cc.svn-base deleted file mode 100644 index 9d621bc20..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.cc.svn-base +++ /dev/null @@ -1,618 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include "googleurl/src/url_util.h" - -#include "base/logging.h" -#include "googleurl/src/url_canon_internal.h" -#include "googleurl/src/url_file.h" -#include "googleurl/src/url_util_internal.h" - -namespace url_util { - -const char kFileScheme[] = "file"; -const char kFileSystemScheme[] = "filesystem"; -const char kMailtoScheme[] = "mailto"; - -namespace { - -// ASCII-specific tolower. The standard library's tolower is locale sensitive, -// so we don't want to use it here. -template inline Char ToLowerASCII(Char c) { - return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; -} - -// Backend for LowerCaseEqualsASCII. -template -inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) { - for (Iter it = a_begin; it != a_end; ++it, ++b) { - if (!*b || ToLowerASCII(*it) != *b) - return false; - } - return *b == 0; -} - -const int kNumStandardURLSchemes = 8; -const char* kStandardURLSchemes[kNumStandardURLSchemes] = { - "http", - "https", - kFileScheme, // Yes, file urls can have a hostname! - "ftp", - "gopher", - "ws", // WebSocket. - "wss", // WebSocket secure. - kFileSystemScheme, -}; - -// List of the currently installed standard schemes. This list is lazily -// initialized by InitStandardSchemes and is leaked on shutdown to prevent -// any destructors from being called that will slow us down or cause problems. -std::vector* standard_schemes = NULL; - -// See the LockStandardSchemes declaration in the header. -bool standard_schemes_locked = false; - -// Ensures that the standard_schemes list is initialized, does nothing if it -// already has values. -void InitStandardSchemes() { - if (standard_schemes) - return; - standard_schemes = new std::vector; - for (int i = 0; i < kNumStandardURLSchemes; i++) - standard_schemes->push_back(kStandardURLSchemes[i]); -} - -// Given a string and a range inside the string, compares it to the given -// lower-case |compare_to| buffer. -template -inline bool DoCompareSchemeComponent(const CHAR* spec, - const url_parse::Component& component, - const char* compare_to) { - if (!component.is_nonempty()) - return compare_to[0] == 0; // When component is empty, match empty scheme. - return LowerCaseEqualsASCII(&spec[component.begin], - &spec[component.end()], - compare_to); -} - -// Returns true if the given scheme identified by |scheme| within |spec| is one -// of the registered "standard" schemes. -template -bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) { - if (!scheme.is_nonempty()) - return false; // Empty or invalid schemes are non-standard. - - InitStandardSchemes(); - for (size_t i = 0; i < standard_schemes->size(); i++) { - if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], - standard_schemes->at(i))) - return true; - } - return false; -} - -template -bool DoFindAndCompareScheme(const CHAR* str, - int str_len, - const char* compare, - url_parse::Component* found_scheme) { - // Before extracting scheme, canonicalize the URL to remove any whitespace. - // This matches the canonicalization done in DoCanonicalize function. - url_canon::RawCanonOutputT whitespace_buffer; - int spec_len; - const CHAR* spec = RemoveURLWhitespace(str, str_len, - &whitespace_buffer, &spec_len); - - url_parse::Component our_scheme; - if (!url_parse::ExtractScheme(spec, spec_len, &our_scheme)) { - // No scheme. - if (found_scheme) - *found_scheme = url_parse::Component(); - return false; - } - if (found_scheme) - *found_scheme = our_scheme; - return DoCompareSchemeComponent(spec, our_scheme, compare); -} - -template -bool DoCanonicalize(const CHAR* in_spec, int in_spec_len, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed) { - // Remove any whitespace from the middle of the relative URL, possibly - // copying to the new buffer. - url_canon::RawCanonOutputT whitespace_buffer; - int spec_len; - const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, - &whitespace_buffer, &spec_len); - - url_parse::Parsed parsed_input; -#ifdef WIN32 - // For Windows, we allow things that look like absolute Windows paths to be - // fixed up magically to file URLs. This is done for IE compatability. For - // example, this will change "c:/foo" into a file URL rather than treating - // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). - // There is similar logic in url_canon_relative.cc for - // - // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which - // has no meaning as an absolute path name. This is because browsers on Mac - // & Unix don't generally do this, so there is no compatibility reason for - // doing so. - if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || - url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { - url_parse::ParseFileURL(spec, spec_len, &parsed_input); - return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, - charset_converter, - output, output_parsed); - } -#endif - - url_parse::Component scheme; - if (!url_parse::ExtractScheme(spec, spec_len, &scheme)) - return false; - - // This is the parsed version of the input URL, we have to canonicalize it - // before storing it in our object. - bool success; - if (DoCompareSchemeComponent(spec, scheme, kFileScheme)) { - // File URLs are special. - url_parse::ParseFileURL(spec, spec_len, &parsed_input); - success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, - charset_converter, output, - output_parsed); - } else if (DoCompareSchemeComponent(spec, scheme, kFileSystemScheme)) { - // Filesystem URLs are special. - url_parse::ParseFileSystemURL(spec, spec_len, &parsed_input); - success = url_canon::CanonicalizeFileSystemURL(spec, spec_len, - parsed_input, - charset_converter, - output, output_parsed); - - } else if (DoIsStandard(spec, scheme)) { - // All "normal" URLs. - url_parse::ParseStandardURL(spec, spec_len, &parsed_input); - success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, - charset_converter, - output, output_parsed); - - } else if (DoCompareSchemeComponent(spec, scheme, kMailtoScheme)) { - // Mailto are treated like a standard url with only a scheme, path, query - url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); - success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, - output, output_parsed); - - } else { - // "Weird" URLs like data: and javascript: - url_parse::ParsePathURL(spec, spec_len, &parsed_input); - success = url_canon::CanonicalizePathURL(spec, spec_len, parsed_input, - output, output_parsed); - } - return success; -} - -template -bool DoResolveRelative(const char* base_spec, - int base_spec_len, - const url_parse::Parsed& base_parsed, - const CHAR* in_relative, - int in_relative_length, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed) { - // Remove any whitespace from the middle of the relative URL, possibly - // copying to the new buffer. - url_canon::RawCanonOutputT whitespace_buffer; - int relative_length; - const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, - &whitespace_buffer, - &relative_length); - bool base_is_authority_based = false; - bool base_is_hierarchical = false; - if (base_spec && - base_parsed.scheme.is_nonempty()) { - int after_scheme = base_parsed.scheme.end() + 1; // Skip past the colon. - int num_slashes = url_parse::CountConsecutiveSlashes( - base_spec, after_scheme, base_spec_len); - base_is_authority_based = num_slashes > 1; - base_is_hierarchical = num_slashes > 0; - } - - bool standard_base_scheme = - base_parsed.scheme.is_nonempty() && - DoIsStandard(base_spec, base_parsed.scheme); - - bool is_relative; - url_parse::Component relative_component; - if (!url_canon::IsRelativeURL(base_spec, base_parsed, - relative, relative_length, - (base_is_hierarchical || standard_base_scheme), - &is_relative, - &relative_component)) { - // Error resolving. - return false; - } - - // Pretend for a moment that |base_spec| is a standard URL. Normally - // non-standard URLs are treated as PathURLs, but if the base has an - // authority we would like to preserve it. - if (is_relative && base_is_authority_based && !standard_base_scheme) { - url_parse::Parsed base_parsed_authority; - ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority); - if (base_parsed_authority.host.is_nonempty()) { - bool did_resolve_succeed = - url_canon::ResolveRelativeURL(base_spec, base_parsed_authority, - false, relative, - relative_component, charset_converter, - output, output_parsed); - // The output_parsed is incorrect at this point (because it was built - // based on base_parsed_authority instead of base_parsed) and needs to be - // re-created. - ParsePathURL(output->data(), output->length(), output_parsed); - return did_resolve_succeed; - } - } else if (is_relative) { - // Relative, resolve and canonicalize. - bool file_base_scheme = base_parsed.scheme.is_nonempty() && - DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); - return url_canon::ResolveRelativeURL(base_spec, base_parsed, - file_base_scheme, relative, - relative_component, charset_converter, - output, output_parsed); - } - - // Not relative, canonicalize the input. - return DoCanonicalize(relative, relative_length, charset_converter, - output, output_parsed); -} - -template -bool DoReplaceComponents(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - const url_canon::Replacements& replacements, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* out_parsed) { - // If the scheme is overridden, just do a simple string substitution and - // reparse the whole thing. There are lots of edge cases that we really don't - // want to deal with. Like what happens if I replace "http://e:8080/foo" - // with a file. Does it become "file:///E:/8080/foo" where the port number - // becomes part of the path? Parsing that string as a file URL says "yes" - // but almost no sane rule for dealing with the components individually would - // come up with that. - // - // Why allow these crazy cases at all? Programatically, there is almost no - // case for replacing the scheme. The most common case for hitting this is - // in JS when building up a URL using the location object. In this case, the - // JS code expects the string substitution behavior: - // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3 - if (replacements.IsSchemeOverridden()) { - // Canonicalize the new scheme so it is 8-bit and can be concatenated with - // the existing spec. - url_canon::RawCanonOutput<128> scheme_replaced; - url_parse::Component scheme_replaced_parsed; - url_canon::CanonicalizeScheme( - replacements.sources().scheme, - replacements.components().scheme, - &scheme_replaced, &scheme_replaced_parsed); - - // We can assume that the input is canonicalized, which means it always has - // a colon after the scheme (or where the scheme would be). - int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1 - : 1; - if (spec_len - spec_after_colon > 0) { - scheme_replaced.Append(&spec[spec_after_colon], - spec_len - spec_after_colon); - } - - // We now need to completely re-parse the resulting string since its meaning - // may have changed with the different scheme. - url_canon::RawCanonOutput<128> recanonicalized; - url_parse::Parsed recanonicalized_parsed; - DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), - charset_converter, - &recanonicalized, &recanonicalized_parsed); - - // Recurse using the version with the scheme already replaced. This will now - // use the replacement rules for the new scheme. - // - // Warning: this code assumes that ReplaceComponents will re-check all - // components for validity. This is because we can't fail if DoCanonicalize - // failed above since theoretically the thing making it fail could be - // getting replaced here. If ReplaceComponents didn't re-check everything, - // we wouldn't know if something *not* getting replaced is a problem. - // If the scheme-specific replacers are made more intelligent so they don't - // re-check everything, we should instead recanonicalize the whole thing - // after this call to check validity (this assumes replacing the scheme is - // much much less common than other types of replacements, like clearing the - // ref). - url_canon::Replacements replacements_no_scheme = replacements; - replacements_no_scheme.SetScheme(NULL, url_parse::Component()); - return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(), - recanonicalized_parsed, replacements_no_scheme, - charset_converter, output, out_parsed); - } - - // If we get here, then we know the scheme doesn't need to be replaced, so can - // just key off the scheme in the spec to know how to do the replacements. - if (DoCompareSchemeComponent(spec, parsed.scheme, kFileScheme)) { - return url_canon::ReplaceFileURL(spec, parsed, replacements, - charset_converter, output, out_parsed); - } - if (DoCompareSchemeComponent(spec, parsed.scheme, kFileSystemScheme)) { - return url_canon::ReplaceFileSystemURL(spec, parsed, replacements, - charset_converter, output, - out_parsed); - } - if (DoIsStandard(spec, parsed.scheme)) { - return url_canon::ReplaceStandardURL(spec, parsed, replacements, - charset_converter, output, out_parsed); - } - if (DoCompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) { - return url_canon::ReplaceMailtoURL(spec, parsed, replacements, - output, out_parsed); - } - - // Default is a path URL. - return url_canon::ReplacePathURL(spec, parsed, replacements, - output, out_parsed); -} - -} // namespace - -void Initialize() { - InitStandardSchemes(); -} - -void Shutdown() { - if (standard_schemes) { - delete standard_schemes; - standard_schemes = NULL; - } -} - -void AddStandardScheme(const char* new_scheme) { - // If this assert triggers, it means you've called AddStandardScheme after - // LockStandardSchemes have been called (see the header file for - // LockStandardSchemes for more). - // - // This normally means you're trying to set up a new standard scheme too late - // in your application's init process. Locate where your app does this - // initialization and calls LockStandardScheme, and add your new standard - // scheme there. - DCHECK(!standard_schemes_locked) << - "Trying to add a standard scheme after the list has been locked."; - - size_t scheme_len = strlen(new_scheme); - if (scheme_len == 0) - return; - - // Dulicate the scheme into a new buffer and add it to the list of standard - // schemes. This pointer will be leaked on shutdown. - char* dup_scheme = new char[scheme_len + 1]; - memcpy(dup_scheme, new_scheme, scheme_len + 1); - - InitStandardSchemes(); - standard_schemes->push_back(dup_scheme); -} - -void LockStandardSchemes() { - standard_schemes_locked = true; -} - -bool IsStandard(const char* spec, const url_parse::Component& scheme) { - return DoIsStandard(spec, scheme); -} - -bool IsStandard(const char16* spec, const url_parse::Component& scheme) { - return DoIsStandard(spec, scheme); -} - -bool FindAndCompareScheme(const char* str, - int str_len, - const char* compare, - url_parse::Component* found_scheme) { - return DoFindAndCompareScheme(str, str_len, compare, found_scheme); -} - -bool FindAndCompareScheme(const char16* str, - int str_len, - const char* compare, - url_parse::Component* found_scheme) { - return DoFindAndCompareScheme(str, str_len, compare, found_scheme); -} - -bool Canonicalize(const char* spec, - int spec_len, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed) { - return DoCanonicalize(spec, spec_len, charset_converter, - output, output_parsed); -} - -bool Canonicalize(const char16* spec, - int spec_len, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed) { - return DoCanonicalize(spec, spec_len, charset_converter, - output, output_parsed); -} - -bool ResolveRelative(const char* base_spec, - int base_spec_len, - const url_parse::Parsed& base_parsed, - const char* relative, - int relative_length, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed) { - return DoResolveRelative(base_spec, base_spec_len, base_parsed, - relative, relative_length, - charset_converter, output, output_parsed); -} - -bool ResolveRelative(const char* base_spec, - int base_spec_len, - const url_parse::Parsed& base_parsed, - const char16* relative, - int relative_length, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed) { - return DoResolveRelative(base_spec, base_spec_len, base_parsed, - relative, relative_length, - charset_converter, output, output_parsed); -} - -bool ReplaceComponents(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - const url_canon::Replacements& replacements, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* out_parsed) { - return DoReplaceComponents(spec, spec_len, parsed, replacements, - charset_converter, output, out_parsed); -} - -bool ReplaceComponents(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - const url_canon::Replacements& replacements, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* out_parsed) { - return DoReplaceComponents(spec, spec_len, parsed, replacements, - charset_converter, output, out_parsed); -} - -// Front-ends for LowerCaseEqualsASCII. -bool LowerCaseEqualsASCII(const char* a_begin, - const char* a_end, - const char* b) { - return DoLowerCaseEqualsASCII(a_begin, a_end, b); -} - -bool LowerCaseEqualsASCII(const char* a_begin, - const char* a_end, - const char* b_begin, - const char* b_end) { - while (a_begin != a_end && b_begin != b_end && - ToLowerASCII(*a_begin) == *b_begin) { - a_begin++; - b_begin++; - } - return a_begin == a_end && b_begin == b_end; -} - -bool LowerCaseEqualsASCII(const char16* a_begin, - const char16* a_end, - const char* b) { - return DoLowerCaseEqualsASCII(a_begin, a_end, b); -} - -void DecodeURLEscapeSequences(const char* input, int length, - url_canon::CanonOutputW* output) { - url_canon::RawCanonOutputT unescaped_chars; - for (int i = 0; i < length; i++) { - if (input[i] == '%') { - unsigned char ch; - if (url_canon::DecodeEscaped(input, &i, length, &ch)) { - unescaped_chars.push_back(ch); - } else { - // Invalid escape sequence, copy the percent literal. - unescaped_chars.push_back('%'); - } - } else { - // Regular non-escaped 8-bit character. - unescaped_chars.push_back(input[i]); - } - } - - // Convert that 8-bit to UTF-16. It's not clear IE does this at all to - // JavaScript URLs, but Firefox and Safari do. - for (int i = 0; i < unescaped_chars.length(); i++) { - unsigned char uch = static_cast(unescaped_chars.at(i)); - if (uch < 0x80) { - // Non-UTF-8, just append directly - output->push_back(uch); - } else { - // next_ch will point to the last character of the decoded - // character. - int next_character = i; - unsigned code_point; - if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character, - unescaped_chars.length(), &code_point)) { - // Valid UTF-8 character, convert to UTF-16. - url_canon::AppendUTF16Value(code_point, output); - i = next_character; - } else { - // If there are any sequences that are not valid UTF-8, we keep - // invalid code points and promote to UTF-16. We copy all characters - // from the current position to the end of the identified sequence. - while (i < next_character) { - output->push_back(static_cast(unescaped_chars.at(i))); - i++; - } - output->push_back(static_cast(unescaped_chars.at(i))); - } - } - } -} - -void EncodeURIComponent(const char* input, int length, - url_canon::CanonOutput* output) { - for (int i = 0; i < length; ++i) { - unsigned char c = static_cast(input[i]); - if (url_canon::IsComponentChar(c)) - output->push_back(c); - else - AppendEscapedChar(c, output); - } -} - -bool CompareSchemeComponent(const char* spec, - const url_parse::Component& component, - const char* compare_to) { - return DoCompareSchemeComponent(spec, component, compare_to); -} - -bool CompareSchemeComponent(const char16* spec, - const url_parse::Component& component, - const char* compare_to) { - return DoCompareSchemeComponent(spec, component, compare_to); -} - -} // namespace url_util diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.h.svn-base deleted file mode 100644 index 9e53d2d32..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util.h.svn-base +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLEURL_SRC_URL_UTIL_H__ -#define GOOGLEURL_SRC_URL_UTIL_H__ - -#include - -#include "base/string16.h" -#include "googleurl/src/url_common.h" -#include "googleurl/src/url_parse.h" -#include "googleurl/src/url_canon.h" - -namespace url_util { - -// Init ------------------------------------------------------------------------ - -// Initialization is NOT required, it will be implicitly initialized when first -// used. However, this implicit initialization is NOT threadsafe. If you are -// using this library in a threaded environment and don't have a consistent -// "first call" (an example might be calling "AddStandardScheme" with your -// special application-specific schemes) then you will want to call initialize -// before spawning any threads. -// -// It is OK to call this function more than once, subsequent calls will simply -// "noop", unless Shutdown() was called in the mean time. This will also be a -// "noop" if other calls to the library have forced an initialization -// beforehand. -GURL_API void Initialize(); - -// Cleanup is not required, except some strings may leak. For most user -// applications, this is fine. If you're using it in a library that may get -// loaded and unloaded, you'll want to unload to properly clean up your -// library. -GURL_API void Shutdown(); - -// Schemes -------------------------------------------------------------------- - -// Adds an application-defined scheme to the internal list of "standard" URL -// schemes. This function is not threadsafe and can not be called concurrently -// with any other url_util function. It will assert if the list of standard -// schemes has been locked (see LockStandardSchemes). -GURL_API void AddStandardScheme(const char* new_scheme); - -// Sets a flag to prevent future calls to AddStandardScheme from succeeding. -// -// This is designed to help prevent errors for multithreaded applications. -// Normal usage would be to call AddStandardScheme for your custom schemes at -// the beginning of program initialization, and then LockStandardSchemes. This -// prevents future callers from mistakenly calling AddStandardScheme when the -// program is running with multiple threads, where such usage would be -// dangerous. -// -// We could have had AddStandardScheme use a lock instead, but that would add -// some platform-specific dependencies we don't otherwise have now, and is -// overkill considering the normal usage is so simple. -GURL_API void LockStandardSchemes(); - -// Locates the scheme in the given string and places it into |found_scheme|, -// which may be NULL to indicate the caller does not care about the range. -// -// Returns whether the given |compare| scheme matches the scheme found in the -// input (if any). The |compare| scheme must be a valid canonical scheme or -// the result of the comparison is undefined. -GURL_API bool FindAndCompareScheme(const char* str, - int str_len, - const char* compare, - url_parse::Component* found_scheme); -GURL_API bool FindAndCompareScheme(const char16* str, - int str_len, - const char* compare, - url_parse::Component* found_scheme); -inline bool FindAndCompareScheme(const std::string& str, - const char* compare, - url_parse::Component* found_scheme) { - return FindAndCompareScheme(str.data(), static_cast(str.size()), - compare, found_scheme); -} -inline bool FindAndCompareScheme(const string16& str, - const char* compare, - url_parse::Component* found_scheme) { - return FindAndCompareScheme(str.data(), static_cast(str.size()), - compare, found_scheme); -} - -// Returns true if the given string represents a standard URL. This means that -// either the scheme is in the list of known standard schemes. -GURL_API bool IsStandard(const char* spec, - const url_parse::Component& scheme); -GURL_API bool IsStandard(const char16* spec, - const url_parse::Component& scheme); - -// TODO(brettw) remove this. This is a temporary compatibility hack to avoid -// breaking the WebKit build when this version is synced via Chrome. -inline bool IsStandard(const char* spec, int spec_len, - const url_parse::Component& scheme) { - return IsStandard(spec, scheme); -} - -// URL library wrappers ------------------------------------------------------- - -// Parses the given spec according to the extracted scheme type. Normal users -// should use the URL object, although this may be useful if performance is -// critical and you don't want to do the heap allocation for the std::string. -// -// As with the url_canon::Canonicalize* functions, the charset converter can -// be NULL to use UTF-8 (it will be faster in this case). -// -// Returns true if a valid URL was produced, false if not. On failure, the -// output and parsed structures will still be filled and will be consistent, -// but they will not represent a loadable URL. -GURL_API bool Canonicalize(const char* spec, - int spec_len, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed); -GURL_API bool Canonicalize(const char16* spec, - int spec_len, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed); - -// Resolves a potentially relative URL relative to the given parsed base URL. -// The base MUST be valid. The resulting canonical URL and parsed information -// will be placed in to the given out variables. -// -// The relative need not be relative. If we discover that it's absolute, this -// will produce a canonical version of that URL. See Canonicalize() for more -// about the charset_converter. -// -// Returns true if the output is valid, false if the input could not produce -// a valid URL. -GURL_API bool ResolveRelative(const char* base_spec, - int base_spec_len, - const url_parse::Parsed& base_parsed, - const char* relative, - int relative_length, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed); -GURL_API bool ResolveRelative(const char* base_spec, - int base_spec_len, - const url_parse::Parsed& base_parsed, - const char16* relative, - int relative_length, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* output_parsed); - -// Replaces components in the given VALID input url. The new canonical URL info -// is written to output and out_parsed. -// -// Returns true if the resulting URL is valid. -GURL_API bool ReplaceComponents( - const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - const url_canon::Replacements& replacements, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* out_parsed); -GURL_API bool ReplaceComponents( - const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - const url_canon::Replacements& replacements, - url_canon::CharsetConverter* charset_converter, - url_canon::CanonOutput* output, - url_parse::Parsed* out_parsed); - -// String helper functions ---------------------------------------------------- - -// Compare the lower-case form of the given string against the given ASCII -// string. This is useful for doing checking if an input string matches some -// token, and it is optimized to avoid intermediate string copies. -// -// The versions of this function that don't take a b_end assume that the b -// string is NULL terminated. -GURL_API bool LowerCaseEqualsASCII(const char* a_begin, - const char* a_end, - const char* b); -GURL_API bool LowerCaseEqualsASCII(const char* a_begin, - const char* a_end, - const char* b_begin, - const char* b_end); -GURL_API bool LowerCaseEqualsASCII(const char16* a_begin, - const char16* a_end, - const char* b); - -// Unescapes the given string using URL escaping rules. -GURL_API void DecodeURLEscapeSequences(const char* input, int length, - url_canon::CanonOutputW* output); - -// Escapes the given string as defined by the JS method encodeURIComponent. See -// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent -GURL_API void EncodeURIComponent(const char* input, int length, - url_canon::CanonOutput* output); - - -} // namespace url_util - -#endif // GOOGLEURL_SRC_URL_UTIL_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_internal.h.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_internal.h.svn-base deleted file mode 100644 index 1fbb46aa2..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_internal.h.svn-base +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2011, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__ -#define GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__ - -#include - -#include "base/string16.h" -#include "googleurl/src/url_common.h" -#include "googleurl/src/url_parse.h" - -namespace url_util { - -extern const char kFileScheme[]; -extern const char kFileSystemScheme[]; -extern const char kMailtoScheme[]; - -// Given a string and a range inside the string, compares it to the given -// lower-case |compare_to| buffer. -bool CompareSchemeComponent(const char* spec, - const url_parse::Component& component, - const char* compare_to); -bool CompareSchemeComponent(const char16* spec, - const url_parse::Component& component, - const char* compare_to); - -} // namespace url_util - -#endif // GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__ diff --git a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_unittest.cc.svn-base b/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_unittest.cc.svn-base deleted file mode 100644 index c7b39fecb..000000000 --- a/ePub3/ThirdParty/google-url/src/.svn/text-base/url_util_unittest.cc.svn-base +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "googleurl/src/url_canon.h" -#include "googleurl/src/url_canon_stdstring.h" -#include "googleurl/src/url_parse.h" -#include "googleurl/src/url_test_utils.h" -#include "googleurl/src/url_util.h" -#include "testing/gtest/include/gtest/gtest.h" - -TEST(URLUtilTest, FindAndCompareScheme) { - url_parse::Component found_scheme; - - // Simple case where the scheme is found and matches. - const char kStr1[] = "http://www.com/"; - EXPECT_TRUE(url_util::FindAndCompareScheme( - kStr1, static_cast(strlen(kStr1)), "http", NULL)); - EXPECT_TRUE(url_util::FindAndCompareScheme( - kStr1, static_cast(strlen(kStr1)), "http", &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component(0, 4)); - - // A case where the scheme is found and doesn't match. - EXPECT_FALSE(url_util::FindAndCompareScheme( - kStr1, static_cast(strlen(kStr1)), "https", &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component(0, 4)); - - // A case where there is no scheme. - const char kStr2[] = "httpfoobar"; - EXPECT_FALSE(url_util::FindAndCompareScheme( - kStr2, static_cast(strlen(kStr2)), "http", &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component()); - - // When there is an empty scheme, it should match the empty scheme. - const char kStr3[] = ":foo.com/"; - EXPECT_TRUE(url_util::FindAndCompareScheme( - kStr3, static_cast(strlen(kStr3)), "", &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component(0, 0)); - - // But when there is no scheme, it should fail. - EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component()); - - // When there is a whitespace char in scheme, it should canonicalize the url - // before comparison. - const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; - EXPECT_TRUE(url_util::FindAndCompareScheme( - whtspc_str, static_cast(strlen(whtspc_str)), "javascript", - &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); - - // Control characters should be stripped out on the ends, and kept in the - // middle. - const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; - EXPECT_FALSE(url_util::FindAndCompareScheme( - ctrl_str, static_cast(strlen(ctrl_str)), "javascript", - &found_scheme)); - EXPECT_TRUE(found_scheme == url_parse::Component(1, 11)); -} - -TEST(URLUtilTest, ReplaceComponents) { - url_parse::Parsed parsed; - url_canon::RawCanonOutputT output; - url_parse::Parsed new_parsed; - - // Check that the following calls do not cause crash - url_canon::Replacements replacements; - replacements.SetRef("test", url_parse::Component(0, 4)); - url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, - &new_parsed); - url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, - &new_parsed); - replacements.ClearRef(); - replacements.SetHost("test", url_parse::Component(0, 4)); - url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, - &new_parsed); - url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, - &new_parsed); - - replacements.ClearHost(); - url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, - &new_parsed); - url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, - &new_parsed); - url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, - &new_parsed); - url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, - &new_parsed); -} - -static std::string CheckReplaceScheme(const char* base_url, - const char* scheme) { - // Make sure the input is canonicalized. - url_canon::RawCanonOutput<32> original; - url_parse::Parsed original_parsed; - url_util::Canonicalize(base_url, strlen(base_url), NULL, - &original, &original_parsed); - - url_canon::Replacements replacements; - replacements.SetScheme(scheme, url_parse::Component(0, strlen(scheme))); - - std::string output_string; - url_canon::StdStringCanonOutput output(&output_string); - url_parse::Parsed output_parsed; - url_util::ReplaceComponents(original.data(), original.length(), - original_parsed, replacements, NULL, - &output, &output_parsed); - - output.Complete(); - return output_string; -} - -TEST(URLUtilTest, ReplaceScheme) { - EXPECT_EQ("https://google.com/", - CheckReplaceScheme("http://google.com/", "https")); - EXPECT_EQ("file://google.com/", - CheckReplaceScheme("http://google.com/", "file")); - EXPECT_EQ("http://home/Build", - CheckReplaceScheme("file:///Home/Build", "http")); - EXPECT_EQ("javascript:foo", - CheckReplaceScheme("about:foo", "javascript")); - EXPECT_EQ("://google.com/", - CheckReplaceScheme("http://google.com/", "")); - EXPECT_EQ("http://google.com/", - CheckReplaceScheme("about:google.com", "http")); - EXPECT_EQ("http:", CheckReplaceScheme("", "http")); - -#ifdef WIN32 - // Magic Windows drive letter behavior when converting to a file URL. - EXPECT_EQ("file:///E:/foo/", - CheckReplaceScheme("http://localhost/e:foo/", "file")); -#endif - - // This will probably change to "about://google.com/" when we fix - // http://crbug.com/160 which should also be an acceptable result. - EXPECT_EQ("about://google.com/", - CheckReplaceScheme("http://google.com/", "about")); -} - -TEST(URLUtilTest, DecodeURLEscapeSequences) { - struct DecodeCase { - const char* input; - const char* output; - } decode_cases[] = { - {"hello, world", "hello, world"}, - {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", - "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"}, - {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"}, - {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", - " !\"#$%&'()*+,-.//"}, - {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", - "0123456789:;<=>?/"}, - {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", - "@ABCDEFGHIJKLMNO/"}, - {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", - "PQRSTUVWXYZ[\\]^_/"}, - {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", - "`abcdefghijklmno/"}, - {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", - "pqrstuvwxyz{|}~\x7f/"}, - // Test un-UTF-8-ization. - {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"}, - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) { - const char* input = decode_cases[i].input; - url_canon::RawCanonOutputT output; - url_util::DecodeURLEscapeSequences(input, strlen(input), &output); - EXPECT_EQ(decode_cases[i].output, - url_test_utils::ConvertUTF16ToUTF8( - string16(output.data(), output.length()))); - } - - // Our decode should decode %00 - const char zero_input[] = "%00"; - url_canon::RawCanonOutputT zero_output; - url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input), - &zero_output); - EXPECT_NE("%00", - url_test_utils::ConvertUTF16ToUTF8( - string16(zero_output.data(), zero_output.length()))); - - // Test the error behavior for invalid UTF-8. - const char invalid_input[] = "%e4%a0%e5%a5%bd"; - const char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; - url_canon::RawCanonOutputT invalid_output; - url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), - &invalid_output); - EXPECT_EQ(string16(invalid_expected), - string16(invalid_output.data(), invalid_output.length())); -} - -TEST(URLUtilTest, TestEncodeURIComponent) { - struct EncodeCase { - const char* input; - const char* output; - } encode_cases[] = { - {"hello, world", "hello%2C%20world"}, - {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", - "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"}, - {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", - "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"}, - {" !\"#$%&'()*+,-./", - "%20!%22%23%24%25%26'()*%2B%2C-.%2F"}, - {"0123456789:;<=>?", - "0123456789%3A%3B%3C%3D%3E%3F"}, - {"@ABCDEFGHIJKLMNO", - "%40ABCDEFGHIJKLMNO"}, - {"PQRSTUVWXYZ[\\]^_", - "PQRSTUVWXYZ%5B%5C%5D%5E_"}, - {"`abcdefghijklmno", - "%60abcdefghijklmno"}, - {"pqrstuvwxyz{|}~\x7f", - "pqrstuvwxyz%7B%7C%7D~%7F"}, - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(encode_cases); i++) { - const char* input = encode_cases[i].input; - url_canon::RawCanonOutputT buffer; - url_util::EncodeURIComponent(input, strlen(input), &buffer); - std::string output(buffer.data(), buffer.length()); - EXPECT_EQ(encode_cases[i].output, output); - } -} - -TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) { - // This tests non-standard (in the sense that GURL::IsStandard() == false) - // hierarchical schemes. - struct ResolveRelativeCase { - const char* base; - const char* rel; - bool is_valid; - const char* out; - } resolve_non_standard_cases[] = { - // Resolving a relative path against a non-hierarchical URL should fail. - {"scheme:opaque_data", "/path", false, ""}, - // Resolving a relative path against a non-standard authority-based base - // URL doesn't alter the authority section. - {"scheme://Authority/", "../path", true, "scheme://Authority/path"}, - // A non-standard hierarchical base is resolved with path URL - // canoncialization rules. - {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"}, - {"data:/Path/../part/part2", "file.html", true, "data:/Path/../part/file.html"}, - // Path URL canonicalization rules also apply to non-standard authority- - // based URLs. - {"custom://Authority/", "file.html", true, "custom://Authority/file.html"}, - {"custom://Authority/", "other://Auth/", true, "other://Auth/"}, - {"custom://Authority/", "../../file.html", true, "custom://Authority/file.html"}, - {"custom://Authority/path/", "file.html", true, "custom://Authority/path/file.html"}, - {"custom://Authority:NoCanon/path/", "file.html", true, "custom://Authority:NoCanon/path/file.html"}, - // It's still possible to get an invalid path URL. - {"custom://Invalid:!#Auth/", "file.html", false, ""}, - // A path with an authority section gets canonicalized under standard URL - // rules, even though the base was non-standard. - {"content://content.Provider/", "//other.Provider", true, "content://other.provider/"}, - // Resolving an absolute URL doesn't cause canonicalization of the - // result. - {"about:blank", "custom://Authority", true, "custom://Authority"}, - // Resolving should fail if the base URL is authority-based but is - // missing a path component (the '/' at the end). - {"scheme://Authority", "path", false, ""}, - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(resolve_non_standard_cases); i++) { - const ResolveRelativeCase& test_data = resolve_non_standard_cases[i]; - url_parse::Parsed base_parsed; - url_parse::ParsePathURL(test_data.base, strlen(test_data.base), - &base_parsed); - - std::string resolved; - url_canon::StdStringCanonOutput output(&resolved); - url_parse::Parsed resolved_parsed; - bool valid = - url_util::ResolveRelative(test_data.base, strlen(test_data.base), - base_parsed, - test_data.rel, strlen(test_data.rel), - NULL, &output, &resolved_parsed); - output.Complete(); - - EXPECT_EQ(test_data.is_valid, valid) << i; - if (test_data.is_valid && valid) - EXPECT_EQ(test_data.out, resolved) << i; - } -} diff --git a/ePub3/ThirdParty/google-url/third_party/.svn/all-wcprops b/ePub3/ThirdParty/google-url/third_party/.svn/all-wcprops deleted file mode 100644 index 2992d61b3..000000000 --- a/ePub3/ThirdParty/google-url/third_party/.svn/all-wcprops +++ /dev/null @@ -1,5 +0,0 @@ -K 25 -svn:wc:ra_dav:version-url -V 35 -/svn/!svn/ver/117/trunk/third_party -END diff --git a/ePub3/ThirdParty/google-url/third_party/.svn/entries b/ePub3/ThirdParty/google-url/third_party/.svn/entries deleted file mode 100644 index 3fefc889f..000000000 --- a/ePub3/ThirdParty/google-url/third_party/.svn/entries +++ /dev/null @@ -1,31 +0,0 @@ -10 - -dir -181 -http://google-url.googlecode.com/svn/trunk/third_party -http://google-url.googlecode.com/svn - - - -2009-09-18T12:10:33.960477Z -117 -maruel@chromium.org - - - - - - - - - - - - - - -8873c55e-713a-0410-88f8-23d9c3d90b1b - -icu -dir - From a830939d2d8f1421fc5fd72d28f8850d31ebdca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Menu?= Date: Mon, 12 Sep 2016 16:56:22 +0200 Subject: [PATCH 2/6] Expose publication's publisher --- Platform/Android/epub3/src/main/jni/packagejni.cpp | 6 ++++++ .../src/main/java/org/readium/sdk/android/Package.java | 8 ++++++++ Platform/Apple/RDServices/Main/RDPackage.h | 1 + Platform/Apple/RDServices/Main/RDPackage.mm | 4 ++++ ePub3/ePub/package.cpp | 7 +++++++ ePub3/ePub/package.h | 7 +++++++ 6 files changed, 33 insertions(+) diff --git a/Platform/Android/epub3/src/main/jni/packagejni.cpp b/Platform/Android/epub3/src/main/jni/packagejni.cpp index 4bfed2040..48eb95bde 100644 --- a/Platform/Android/epub3/src/main/jni/packagejni.cpp +++ b/Platform/Android/epub3/src/main/jni/packagejni.cpp @@ -524,6 +524,12 @@ JNIEXPORT jstring JNICALL Java_org_readium_sdk_android_Package_nativeGetISBN jni::StringUTF str(env, (std::string&) PCKG(pckgPtr)->ISBN().stl_str()); return (jstring) str; } +JNIEXPORT jstring JNICALL Java_org_readium_sdk_android_Package_nativeGetPublisher + (JNIEnv* env, jobject thiz, jlong pckgPtr) +{ + jni::StringUTF str(env, (std::string&) PCKG(pckgPtr)->Publisher().stl_str()); + return (jstring) str; +} JNIEXPORT jstring JNICALL Java_org_readium_sdk_android_Package_nativeGetLanguage (JNIEnv* env, jobject thiz, jlong pckgPtr) { diff --git a/Platform/Android/lib/src/main/java/org/readium/sdk/android/Package.java b/Platform/Android/lib/src/main/java/org/readium/sdk/android/Package.java index 4445922eb..dbfc92cfe 100644 --- a/Platform/Android/lib/src/main/java/org/readium/sdk/android/Package.java +++ b/Platform/Android/lib/src/main/java/org/readium/sdk/android/Package.java @@ -68,6 +68,7 @@ public class Package { private String type; private String version; private String isbn; + private String publisher; private String language; private String copyrightOwner; private String source; @@ -155,6 +156,7 @@ private void loadData() { type = nativeGetType(__nativePtr); version = nativeGetVersion(__nativePtr); isbn = nativeGetISBN(__nativePtr); + publisher = nativeGetPublisher(__nativePtr); language = nativeGetLanguage(__nativePtr); copyrightOwner = nativeGetCopyrightOwner(__nativePtr); source = nativeGetSource(__nativePtr); @@ -186,6 +188,7 @@ private void loadData() { Log.i(TAG, "type: "+type); Log.i(TAG, "version: "+version); Log.i(TAG, "isbn: "+isbn); + Log.i(TAG, "publisher:" +publisher); Log.i(TAG, "language: "+language); Log.i(TAG, "copyrightOwner: "+copyrightOwner); Log.i(TAG, "source: "+source); @@ -263,6 +266,10 @@ public String getIsbn() { return isbn; } + public String getPublisher() { + return publisher; + } + public String getLanguage() { return language; } @@ -513,6 +520,7 @@ public JSONObject toJSON() { private native String nativeGetType(long nativePtr); private native String nativeGetVersion(long nativePtr); private native String nativeGetISBN(long nativePtr); + private native String nativeGetPublisher(long nativePtr); private native String nativeGetLanguage(long nativePtr); private native String nativeGetCopyrightOwner(long nativePtr); private native String nativeGetSource(long nativePtr); diff --git a/Platform/Apple/RDServices/Main/RDPackage.h b/Platform/Apple/RDServices/Main/RDPackage.h index 404b4a9c1..03bb97929 100644 --- a/Platform/Apple/RDServices/Main/RDPackage.h +++ b/Platform/Apple/RDServices/Main/RDPackage.h @@ -41,6 +41,7 @@ @property (nonatomic, readonly) NSDictionary *dictionary; @property (nonatomic, readonly) NSString *fullTitle; @property (nonatomic, readonly) NSString *isbn; +@property (nonatomic, readonly) NSString *publisher; @property (nonatomic, readonly) NSString *language; @property (nonatomic, readonly) RDNavigationElement *listOfFigures; @property (nonatomic, readonly) RDNavigationElement *listOfIllustrations; diff --git a/Platform/Apple/RDServices/Main/RDPackage.mm b/Platform/Apple/RDServices/Main/RDPackage.mm index 085e3b42f..b4b9da680 100644 --- a/Platform/Apple/RDServices/Main/RDPackage.mm +++ b/Platform/Apple/RDServices/Main/RDPackage.mm @@ -193,6 +193,10 @@ - (NSString *)isbn { return [NSString stringWithUTF8String:s.c_str()]; } +- (NSString *)publisher { + const ePub3::string s = m_package->Publisher(); + return [NSString stringWithUTF8String:s.c_str()]; +} - (NSString *)language { const ePub3::string s = m_package->Language(); diff --git a/ePub3/ePub/package.cpp b/ePub3/ePub/package.cpp index 6a8f38173..dc7101f90 100644 --- a/ePub3/ePub/package.cpp +++ b/ePub3/ePub/package.cpp @@ -1587,6 +1587,13 @@ const string Package::Contributors(bool localized) const ss << "and " << *last; return string(ss.str()); } +const string& Package::Publisher() const +{ + auto items = PropertiesMatching(DCType::Publisher); + if ( items.empty() ) + return string::EmptyString; + return items[0]->Value(); +} const string& Package::Language() const { auto items = PropertiesMatching(DCType::Language); diff --git a/ePub3/ePub/package.h b/ePub3/ePub/package.h index 2d24a05e9..718d1acf3 100644 --- a/ePub3/ePub/package.h +++ b/ePub3/ePub/package.h @@ -717,6 +717,13 @@ class Package : public PackageBase, public PointerType, public Property EPUB3_EXPORT const string Contributors(bool localized=true) const; + /** + Retrieves the publisher of the publication, if available. + @result The publication's publisher. + */ + EPUB3_EXPORT + const string& Publisher() const; + /** Retrieves the language of the publication, if available. @result The publication's original language. From aa83bd8acae6fc3962391091d76190e4d55e6a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ko=CC=88rner?= Date: Tue, 20 Sep 2016 18:11:24 +0200 Subject: [PATCH 3/6] Fix #257 Xcode 8 compile errors with C++ functions --- .../Apple/ePub3.xcodeproj/project.pbxproj | 41 ++++++++++++++++--- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/Platform/Apple/ePub3.xcodeproj/project.pbxproj b/Platform/Apple/ePub3.xcodeproj/project.pbxproj index ba89ba550..efd515ed0 100644 --- a/Platform/Apple/ePub3.xcodeproj/project.pbxproj +++ b/Platform/Apple/ePub3.xcodeproj/project.pbxproj @@ -1944,7 +1944,7 @@ ABA72C1B1655382E003125FF /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 0510; + LastUpgradeCheck = 0800; ORGANIZATIONNAME = "The Readium Foundation and contributors"; }; buildConfigurationList = ABA72C1E1655382E003125FF /* Build configuration list for PBXProject "ePub3" */; @@ -2358,7 +2358,7 @@ /usr/include/libxml2, "$(SRCROOT)/include", ); - IPHONEOS_DEPLOYMENT_TARGET = 6.0; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; LIBRARY_SEARCH_PATHS = ""; OTHER_LDFLAGS = "-ObjC"; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -2381,7 +2381,7 @@ /usr/include/libxml2, "$(SRCROOT)/include", ); - IPHONEOS_DEPLOYMENT_TARGET = 6.0; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; LIBRARY_SEARCH_PATHS = ""; OTHER_LDFLAGS = "-ObjC"; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -2395,14 +2395,24 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; GCC_PREPROCESSOR_DEFINITIONS = ( "BUILDING_EPUB3=1", @@ -2412,10 +2422,16 @@ GCC_SYMBOLS_PRIVATE_EXTERN = NO; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; MACOSX_DEPLOYMENT_TARGET = 10.8; ONLY_ACTIVE_ARCH = YES; + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-Wno-inconsistent-missing-override", + ); SDKROOT = macosx; }; name = Debug; @@ -2424,23 +2440,38 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_STRICT_OBJC_MSGSEND = YES; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_NO_COMMON_BLOCKS = YES; GCC_PREPROCESSOR_DEFINITIONS = ( "BUILDING_EPUB3=1", "NDEBUG=1", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; MACOSX_DEPLOYMENT_TARGET = 10.8; + OTHER_CPLUSPLUSFLAGS = ( + "$(OTHER_CFLAGS)", + "-Wno-inconsistent-missing-override", + ); SDKROOT = macosx; }; name = Release; From 55ded556a2e3b114edb70f5ce3e743379549a154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ko=CC=88rner?= Date: Tue, 20 Sep 2016 19:46:25 +0200 Subject: [PATCH 4/6] Update macOS deployment target to 10.9 --- Platform/Apple/ePub3.xcodeproj/project.pbxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Platform/Apple/ePub3.xcodeproj/project.pbxproj b/Platform/Apple/ePub3.xcodeproj/project.pbxproj index efd515ed0..5b4a69281 100644 --- a/Platform/Apple/ePub3.xcodeproj/project.pbxproj +++ b/Platform/Apple/ePub3.xcodeproj/project.pbxproj @@ -2426,7 +2426,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 10.8; + MACOSX_DEPLOYMENT_TARGET = 10.9; ONLY_ACTIVE_ARCH = YES; OTHER_CPLUSPLUSFLAGS = ( "$(OTHER_CFLAGS)", @@ -2467,7 +2467,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 10.8; + MACOSX_DEPLOYMENT_TARGET = 10.9; OTHER_CPLUSPLUSFLAGS = ( "$(OTHER_CFLAGS)", "-Wno-inconsistent-missing-override", From d790ecfbdfa62f97bbf730d76aa746433c98ba16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ko=CC=88rner?= Date: Mon, 5 Dec 2016 18:08:48 +0100 Subject: [PATCH 5/6] Disable TLS in Future on iOS --- Platform/Apple/ePub3.xcodeproj/project.pbxproj | 2 ++ ePub3/utilities/future.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Platform/Apple/ePub3.xcodeproj/project.pbxproj b/Platform/Apple/ePub3.xcodeproj/project.pbxproj index 5b4a69281..d9f020dde 100644 --- a/Platform/Apple/ePub3.xcodeproj/project.pbxproj +++ b/Platform/Apple/ePub3.xcodeproj/project.pbxproj @@ -2433,6 +2433,7 @@ "-Wno-inconsistent-missing-override", ); SDKROOT = macosx; + STRIP_INSTALLED_PRODUCT = NO; }; name = Debug; }; @@ -2473,6 +2474,7 @@ "-Wno-inconsistent-missing-override", ); SDKROOT = macosx; + STRIP_INSTALLED_PRODUCT = NO; }; name = Release; }; diff --git a/ePub3/utilities/future.cpp b/ePub3/utilities/future.cpp index 625f0f524..2afd6909e 100644 --- a/ePub3/utilities/future.cpp +++ b/ePub3/utilities/future.cpp @@ -127,7 +127,7 @@ std::vector<__shared_state_base::_ContinuationPtrType>& __shared_state_base::__at_thread_exit() { typedef std::vector<_ContinuationPtrType> _VecType; -#if EPUB_COMPILER_SUPPORTS(CXX_THREAD_LOCAL) +#if EPUB_COMPILER_SUPPORTS(CXX_THREAD_LOCAL) && !EPUB_OS(IOS) static thread_local _VecType __vec; return __vec; #elif EPUB_COMPILER(MSVC) From ae7c0b85a4274886c7545283571ea37610ec39c1 Mon Sep 17 00:00:00 2001 From: Daniel Weck Date: Thu, 15 Dec 2016 18:48:14 +0000 Subject: [PATCH 6/6] Eliminate risk of XXE Xml External Entity malicious use https://github.com/readium/readium-sdk/issues/269 --- ePub3/ePub/archive_xml.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ePub3/ePub/archive_xml.cpp b/ePub3/ePub/archive_xml.cpp index 497e67fc2..311c9b3e5 100644 --- a/ePub3/ePub/archive_xml.cpp +++ b/ePub3/ePub/archive_xml.cpp @@ -25,7 +25,7 @@ EPUB3_BEGIN_NAMESPACE -const int ArchiveXmlReader::DEFAULT_OPTIONS = XML_PARSE_RECOVER | XML_PARSE_NOENT | XML_PARSE_DTDATTR | XML_PARSE_NONET; +const int ArchiveXmlReader::DEFAULT_OPTIONS = XML_PARSE_RECOVER | XML_PARSE_DTDATTR | XML_PARSE_NONET; ArchiveXmlReader::ArchiveXmlReader(ArchiveReader * r) : _reader(r) {