diff --git a/Jamfile b/Jamfile index bcf7ee6e..613957c0 100644 --- a/Jamfile +++ b/Jamfile @@ -20,4 +20,3 @@ constant c11-requires : ; build-project test ; -build-project example ; diff --git a/doc/qbk/0.main.qbk b/doc/qbk/0.main.qbk index b2899390..77311888 100644 --- a/doc/qbk/0.main.qbk +++ b/doc/qbk/0.main.qbk @@ -45,7 +45,7 @@ [def __MoveConstructible__ [@https://en.cppreference.com/w/cpp/named_req/MoveConstructible ['MoveConstructible]]] [def __SemiRegular__ [@https://en.cppreference.com/w/cpp/concepts/semiregular ['SemiRegular]]] [def __Swappable__ [@https://en.cppreference.com/w/cpp/named_req/Swappable ['Swappable]]] -[def __CharSet__ [link url.grammar.charset ['CharSet]]] +[def __CharSet__ [link url.grammars_rules.charset ['CharSet]]] [def __std_swap__ [@https://en.cppreference.com/w/cpp/algorithm/swap `std::swap`]] [def __authority_view__ [link url.ref.boost__urls__authority_view `authority_view`]] @@ -88,12 +88,10 @@ [include 4.0.modifying.qbk] -[section Allocators] -[endsect] - +[section Grammars Rules] [include 5.0.grammars.qbk] - -[section Examples] +[include 5.1.customization.qbk] +[include 5.2.CharSet.qbk] [endsect] [section:ref Reference] diff --git a/doc/qbk/5.0.grammars.qbk b/doc/qbk/5.0.grammars.qbk index 80fa54d9..d49aaabb 100644 --- a/doc/qbk/5.0.grammars.qbk +++ b/doc/qbk/5.0.grammars.qbk @@ -8,9 +8,7 @@ Official repository: https://github.com/CPPAlliance/url ] -[section Grammar] - -[heading Design of grammar rules] +[section Design] The function [link url.ref.boost__urls__grammar__parse `parse`] implements the logic for parsing strings according to grammar rules. @@ -18,14 +16,65 @@ according to grammar rules. A grammar rule type, henceforth called a "rule", provides an algorithm for parsing an input string. An instance of the rule is used to store the results. -[heading Customization points] +[table [[Code][Output]] [[ +[c++] +[snippet_parse_1] +][ +[teletype] +``` + scheme: http + suffix: :after_scheme +``` +]]] + +In this example, the function [link url.ref.boost__urls__grammar__parse `parse`] +returns `true` if the specified range of characters begins with a scheme. When +the operation completes successfully, the rule instance holds the results. + +The iterator is updated to the position where the rule ended, leaving the suffix +at the range between the new iterator and the old end iterator. This behavior is +useful when parsing a sequence of rules. + +[table [[Code][Output]] [[ +[c++] +[snippet_parse_2] +][ +[teletype] +``` + query: ?key=value + fragment: anchor +``` +]]] + +Parsing a sequence of rules is such a common pattern that a special overload is +provided: + +[table [[Code][Output]] [[ +[c++] +[snippet_parse_3] +][ +[teletype] +``` + query: ?key=value + fragment: anchor +``` +]]] -Users can define a free function `parse` as a customization point defining how to parse their -grammar rules as part of the same architecture that might include arbitrary grammar rules in expressions. +If all the logic has been represented in a single rule, we often want to parse +a complete string as a rule. -These new function overloads may be defined in other namespaces. As with __std_swap__, the design relies -on [@https://en.cppreference.com/w/cpp/language/adl argument-dependent lookup] to find these overloads. +[table [[Code][Output]] [[ +[c++] +[snippet_parse_4] +][ +[teletype] +``` + scheme: http + host: www.boost.org +``` +]]] -[include CharSet.qbk] +The function [link url.ref.boost__urls__grammar__parse_string `parse_string`] only returns +true when the whole string matches the rule. [endsect] diff --git a/doc/qbk/5.1.customization.qbk b/doc/qbk/5.1.customization.qbk new file mode 100644 index 00000000..7e8830e3 --- /dev/null +++ b/doc/qbk/5.1.customization.qbk @@ -0,0 +1,45 @@ +[/ + Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) + Copyright (c) 2021 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/CPPAlliance/url +] + +[section Customization points] + +Users can define customization points defining the logic to parse and store the results of +grammar rules as part of the same library architecture. + +This allows arbitrary grammar logic in expressions that interact with the existing rules. +Some use cases could include alternative or extended syntax for URLs and its components. + +These new function overloads may be defined in other namespaces with the `tag_invoke` +customization point. + +[snippet_customization_1] + +The function [link url.ref.boost__urls__grammar__parse `parse`] relies on +[@https://en.cppreference.com/w/cpp/language/adl argument-dependent lookup] to find these function +overloads with the appropriate tag [link url.ref.boost__urls__grammar__parse_tag `grammar::parse_tag`]. + +At this point, the new rule can interact with existing rules in any of the parsing functions: + +[table [[Code][Output]] [[ +[c++] +[snippet_customization_2] +][ +[teletype] +``` + scheme: http + lower: somelowercase +``` +]]] + + + + + +[endsect] diff --git a/doc/qbk/CharSet.qbk b/doc/qbk/5.2.CharSet.qbk similarity index 83% rename from doc/qbk/CharSet.qbk rename to doc/qbk/5.2.CharSet.qbk index 09ce87d3..53ccf1d5 100644 --- a/doc/qbk/CharSet.qbk +++ b/doc/qbk/5.2.CharSet.qbk @@ -12,10 +12,31 @@ A ['CharSet] is a unary predicate which accepts as its single argument a value of type `char`. The return value of the predicate is a `bool` whose value is true if the character is a member of the -notional character set, or false otherwise. A character set can be -used to specify which characters are unreserved and thus, do not -need to be escaped when used in percent-encoding algorithms. -Character sets may also be used by parsers; some character sets +notional character set, or false otherwise. + +[snippet_charset_1] + +The library provides a number of ['CharSet] predicates related to +URL components. + +[snippet_charset_2] + +A character set are used to specify which characters are unreserved +in a grammar rules. In URLs, they determine which characters do not +need to be escaped in percent-encoding algorithms. + +[table [[Code][Output]] [[ +[c++] +[snippet_charset_3] +][ +[teletype] +``` + query: key=the%20value + decoded size: 13 +``` +]]] + +Character sets may also be used directly by parsers; some character sets have optimized implementations for finding matching elements. [heading Related Identifiers] @@ -90,26 +111,7 @@ In this table: [heading Exemplar] -``` -struct CharSet -{ - bool operator()( char c ) const noexcept; - - char const* find_if ( char const* first, char const* last ) const noexcept; - char const* find_if_not ( char const* first, char const* last ) const noexcept; -}; -``` - -[heading Example] -``` -struct digit_chars_t -{ - constexpr bool operator()( char c ) const noexcept - { - return c >= '0' && c <= '9'; - } -}; -``` +[snippet_charset_4] [heading Models] diff --git a/doc/qbk/quickref.xml b/doc/qbk/quickref.xml index b52d54d9..b9f82d78 100644 --- a/doc/qbk/quickref.xml +++ b/doc/qbk/quickref.xml @@ -106,7 +106,7 @@ Concepts - CharSet + CharSet diff --git a/test/unit/snippets.cpp b/test/unit/snippets.cpp index 4011cfa2..cca51654 100644 --- a/test/unit/snippets.cpp +++ b/test/unit/snippets.cpp @@ -13,13 +13,8 @@ #include //] -#if 0 -//[snippet_headers_2 -#include -//] -#endif - #include +#include //[snippet_headers_3 #include @@ -856,6 +851,161 @@ using_modifying() } } +void +grammar_parse() +{ + { + //[snippet_parse_1 + string_view s = "http:after_scheme"; + scheme_rule r; + const char* it = s.begin(); + error_code ec; + if (grammar::parse(it, s.end(), ec, r)) + { + std::cout << "scheme: " << r.scheme << '\n'; + std::cout << "suffix: " << it << '\n'; + } + //] + } + + { + //[snippet_parse_2 + string_view s = "?key=value#anchor"; + query_part_rule r1; + fragment_part_rule r2; + const char* it = s.begin(); + error_code ec; + if (grammar::parse(it, s.end(), ec, r1)) + { + if (grammar::parse(it, s.end(), ec, r2)) + { + std::cout << "query: " << r1.query_part << '\n'; + std::cout << "fragment: " << r2.fragment.str << '\n'; + } + } + //] + } + + { + //[snippet_parse_3 + string_view s = "?key=value#anchor"; + query_part_rule r1; + fragment_part_rule r2; + const char* it = s.begin(); + error_code ec; + if (grammar::parse(it, s.end(), ec, r1, r2)) + { + std::cout << "query: " << r1.query_part << '\n'; + std::cout << "fragment: " << r2.fragment.str << '\n'; + } + //] + } + + { + //[snippet_parse_4 + string_view s = "http://www.boost.org"; + uri_rule r; + error_code ec; + if (grammar::parse_string(s, ec, r)) + { + std::cout << "scheme: " << r.scheme_part.scheme << '\n'; + std::cout << "host: " << r.hier_part.authority.host.host_part << '\n'; + } + //] + } +} + +//[snippet_customization_1 +struct lowercase_rule +{ + string_view str; + + friend + void + tag_invoke( + grammar::parse_tag const&, + char const*& it, + char const* const end, + error_code& ec, + lowercase_rule& t) noexcept + { + ec = {}; + char const* begin = it; + while (it != end && std::islower(*it)) + { + ++it; + } + t.str = string_view(begin, it); + } +}; +//] + +void +grammar_customization() +{ + { + //[snippet_customization_2 + string_view s = "http:somelowercase"; + scheme_rule r1; + lowercase_rule r2; + error_code ec; + if (grammar::parse_string(s, ec, r1, ':', r2)) + { + std::cout << "scheme: " << r1.scheme << '\n'; + std::cout << "lower: " << r2.str << '\n'; + } + //] + } +} + +//[snippet_charset_1 +struct digit_chars_t +{ + constexpr + bool + operator()( char c ) const noexcept + { + return c >= '0' && c <= '9'; + } +}; +//] + +//[snippet_charset_4 +struct CharSet +{ + bool operator()( char c ) const noexcept; + + char const* find_if ( char const* first, char const* last ) const noexcept; + char const* find_if_not ( char const* first, char const* last ) const noexcept; +}; +//] + + +void +grammar_charset() +{ + { + //[snippet_charset_2 + query_chars_t cs; + assert(cs('a')); + assert(cs('=')); + assert(!cs('#')); + //] + } + { + //[snippet_charset_3 + string_view s = "key=the%20value"; + pct_encoded_rule r; + error_code ec; + if (grammar::parse_string(s, ec, r)) + { + std::cout << "query: " << r.s.str << '\n'; + std::cout << "decoded size: " << r.s.decoded_size << '\n'; + } + //] + } +} + namespace boost { namespace urls { @@ -873,7 +1023,9 @@ class snippets_test parsing_path(); parsing_query(); parsing_fragment(); - using_modifying(); + grammar_parse(); + grammar_customization(); + grammar_charset(); BOOST_TEST_PASS(); }