uri.hpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. #ifndef REALM_UTIL_URI_HPP
  2. #define REALM_UTIL_URI_HPP
  3. #include <string>
  4. namespace realm {
  5. namespace util {
  6. /// \brief A decomposed URI reference.
  7. ///
  8. /// A Uri object contains a URI reference decomposed into its 5 main component
  9. /// parts (scheme, authority, path, query, and fragment identifier).
  10. ///
  11. /// The decomposition process (as carried out by the constructor) performs a
  12. /// maximally lenient parsing of the specified URI reference. It does that
  13. /// according to the following regular expression (copied verbatimly from
  14. /// http://tools.ietf.org/html/rfc3986#appendix-B):
  15. ///
  16. /// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  17. /// 12 3 4 5 6 7 8 9
  18. ///
  19. /// Group
  20. /// ------------------------
  21. /// 1 Scheme part
  22. /// 3 Authority part
  23. /// 5 Path part
  24. /// 6 Query part
  25. /// 8 Fragment identifier part
  26. ///
  27. /// NOTE: Since this regular expression maches every string, every string is
  28. /// decomposable.
  29. ///
  30. /// NOTE: This class does not attempt to perform any level of validation of URI
  31. /// references against the grammer specified in the RFC. Such validation could
  32. /// be added later, for example through a new `Uri::validate()`.
  33. ///
  34. /// For example, the decomposition of
  35. /// "http://www.ietf.org/rfc/rfc2396.txt?foo=bar#chp3" is:
  36. ///
  37. /// <pre>
  38. ///
  39. /// scheme -> "http:"
  40. /// auth -> "//www.ietf.org"
  41. /// path -> "/rfc/rfc2396.txt"
  42. /// query -> "?foo=bar"
  43. /// frag -> "#chp3"
  44. ///
  45. /// </pre>
  46. ///
  47. /// This class also provides recomposition of a URI references from their
  48. /// component parts, where the parts can be specified individually, or be a
  49. /// result of URI resoultion.
  50. ///
  51. /// It is important to understand, however, that certain restrictions need to
  52. /// apply to each component part in order that the URI reference as a whole is
  53. /// self consistent. More concretely, it is necessary to require that the
  54. /// component parts at any time must have values that will be preserved across a
  55. /// recomposition -> decomposition cycle.
  56. ///
  57. /// The actual restrictions on each component part is specified for the
  58. /// corresponding setter-method (e.g., set_scheme()).
  59. ///
  60. /// Note that component parts resulting from decomposition, canonicalize, or
  61. /// from resolution (resolve()) will automatically (by design of the underlying
  62. /// algorithm) adhere to these rules.
  63. ///
  64. /// Decomposition, recomposition, conanonicalization, and resolution algorithms
  65. /// are taken from RFC 3986.
  66. ///
  67. /// \sa http://tools.ietf.org/html/rfc3986
  68. class Uri {
  69. public:
  70. Uri();
  71. /// Decompose the specified URI reference into its five main parts.
  72. Uri(const std::string&);
  73. /// Reconstruct a URI reference from its 5 components.
  74. std::string recompose() const;
  75. /*
  76. /// Resolve this URI reference against the specified base URI reference
  77. /// according to the rules described in section 5.2 of RFC 3986.
  78. ///
  79. /// Be aware that a fragment identifier on the base URI reference is never
  80. /// carried over to the result. This is in accordance with the RFC.
  81. void resolve(const Uri& base, bool strict = true);
  82. */
  83. /// Remove empty URI components. Also, for URI references having either a
  84. /// scheme part or an authority part, replace an absent path with "/".
  85. void canonicalize();
  86. /// Get the scheme part of this URI reference including the trailing ":", or
  87. /// the empty tring if there is no scheme part.
  88. const std::string& get_scheme() const;
  89. /// Get the authority part of this URI reference including the leading "//",
  90. /// or the empty tring if there is no authority part.
  91. const std::string& get_auth() const;
  92. /// Same as get_auth() (with no arguments), but parse the authority component
  93. /// into userinfo, host, and port subcomponents.
  94. ///
  95. /// \return True if, and only if the authority component was present (i.e.,
  96. /// not the empty string). When false is returned, none of the specified
  97. /// strings will have been modified.
  98. bool get_auth(std::string& userinfo, std::string& host, std::string& port) const;
  99. /// Get the path part of this URI reference, or the empty tring if there is
  100. /// no path part.
  101. const std::string& get_path() const;
  102. /// Get the query part of this URI reference including the leading "?", or
  103. /// the empty tring if there is no query part.
  104. const std::string& get_query() const;
  105. /// Get the fragment identifier of this URI reference including the leading
  106. /// "#", or the empty tring if there is no fragment identifier.
  107. const std::string& get_frag() const;
  108. /// The specified string must either be empty or have a final ":". Also, it
  109. /// must not contain "/", "?", or "#", nor may it contain more than one ":".
  110. ///
  111. /// \throw std::invalid_argument If the specified string is not valid
  112. /// according to the specified rules.
  113. void set_scheme(const std::string&);
  114. /// The specified string must either be empty or have "//" as a
  115. /// prefix. Also, it must not contain "?" or "#", nor may it contain "/"
  116. /// beyond the first two.
  117. ///
  118. /// \throw std::invalid_argument If the specified string is not valid
  119. /// according to the specified rules.
  120. void set_auth(const std::string&);
  121. /// The specified string must not contain "?" or "#".
  122. ///
  123. /// \throw std::invalid_argument If the specified string is not valid
  124. /// according to the specified rules.
  125. void set_path(const std::string&);
  126. /// The specified string must either be empty or have a leading "?". Also,
  127. /// it must not contain "#".
  128. ///
  129. /// \throw std::invalid_argument If the specified string is not valid
  130. /// according to the specified rules.
  131. void set_query(const std::string&);
  132. /*
  133. /// Set the query string to the serialized form of the specified set of
  134. /// query parameters. This is slightly faster than set_query(q.encode())
  135. /// because it avoids the validity check on the string.
  136. void set_query(const Params&);
  137. */
  138. /// The specified string must either be empty or have a leading "#".
  139. ///
  140. /// \throw std::invalid_argument If the specified string is not valid
  141. /// according to the specified rules.
  142. void set_frag(const std::string&);
  143. bool is_absolute() const;
  144. private:
  145. std::string m_scheme, m_auth, m_path, m_query, m_frag;
  146. };
  147. /// uri_percent_encode() uri encodes a string as defined in according to
  148. /// https://tools.ietf.org/html/rfc3986#section-2.1
  149. /// The unescaped input must be UTF-8 encoded. uri_percent_encode() works
  150. /// by replacing each UTF-8 character by three charatcers.
  151. /// pct-encoded = "%" HEXDIG HEXDIG
  152. /// where HEXDIG HEXDIG is the hexadecimal value of the character.
  153. /// HEXDIG is a capital letter for A - F.
  154. /// Unreserved chracters are not encoded.
  155. /// unreseved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  156. ///
  157. /// uri_percent_decode() is the inverse of uri_percent_encode().
  158. /// uri_percent_decode() throws std::runtime_error if the input
  159. /// is invalid and cannot be decoded.
  160. std::string uri_percent_encode(const std::string& unescaped);
  161. std::string uri_percent_decode(const std::string& escaped);
  162. // Implementation
  163. inline Uri::Uri() {}
  164. inline std::string Uri::recompose() const
  165. {
  166. return m_scheme + m_auth + m_path + m_query + m_frag;
  167. }
  168. inline const std::string& Uri::get_scheme() const
  169. {
  170. return m_scheme;
  171. }
  172. inline const std::string& Uri::get_auth() const
  173. {
  174. return m_auth;
  175. }
  176. inline const std::string& Uri::get_path() const
  177. {
  178. return m_path;
  179. }
  180. inline const std::string& Uri::get_query() const
  181. {
  182. return m_query;
  183. }
  184. inline const std::string& Uri::get_frag() const
  185. {
  186. return m_frag;
  187. }
  188. inline bool Uri::is_absolute() const
  189. {
  190. return !m_scheme.empty();
  191. }
  192. } // namespace util
  193. } // namespace realm
  194. #endif // REALM_UTIL_URI_HPP