LCOV - code coverage report
Current view: top level - src/util - strencodings.h (source / functions) Hit Total Coverage
Test: fuzz_coverage.info Lines: 16 64 25.0 %
Date: 2023-09-26 12:08:55 Functions: 5 30 16.7 %

          Line data    Source code
       1             : // Copyright (c) 2009-2010 Satoshi Nakamoto
       2             : // Copyright (c) 2009-2022 The Bitcoin Core developers
       3             : // Distributed under the MIT software license, see the accompanying
       4             : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
       5             : 
       6             : /**
       7             :  * Utilities for converting data from/to strings.
       8             :  */
       9             : #ifndef BITCOIN_UTIL_STRENCODINGS_H
      10             : #define BITCOIN_UTIL_STRENCODINGS_H
      11             : 
      12             : #include <span.h>
      13             : #include <util/string.h>
      14             : 
      15             : #include <charconv>
      16             : #include <cstddef>
      17             : #include <cstdint>
      18             : #include <limits>
      19             : #include <optional>
      20             : #include <string>      // IWYU pragma: export
      21             : #include <string_view> // IWYU pragma: export
      22             : #include <system_error>
      23             : #include <type_traits>
      24             : #include <vector>
      25             : 
      26             : /** Used by SanitizeString() */
      27             : enum SafeChars
      28             : {
      29             :     SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
      30             :     SAFE_CHARS_UA_COMMENT, //!< BIP-0014 subset
      31             :     SAFE_CHARS_FILENAME, //!< Chars allowed in filenames
      32             :     SAFE_CHARS_URI, //!< Chars allowed in URIs (RFC 3986)
      33             : };
      34             : 
      35             : /**
      36             :  * Used by ParseByteUnits()
      37             :  * Lowercase base 1000
      38             :  * Uppercase base 1024
      39             : */
      40             : enum class ByteUnit : uint64_t {
      41             :     NOOP = 1ULL,
      42             :     k = 1000ULL,
      43             :     K = 1024ULL,
      44             :     m = 1'000'000ULL,
      45             :     M = 1ULL << 20,
      46             :     g = 1'000'000'000ULL,
      47             :     G = 1ULL << 30,
      48             :     t = 1'000'000'000'000ULL,
      49             :     T = 1ULL << 40,
      50             : };
      51             : 
      52             : /**
      53             : * Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
      54             : * addresses, but avoid anything even possibly remotely dangerous like & or >
      55             : * @param[in] str    The string to sanitize
      56             : * @param[in] rule   The set of safe chars to choose (default: least restrictive)
      57             : * @return           A new string without unsafe chars
      58             : */
      59             : std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
      60             : /** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. Returns nullopt on invalid input. */
      61             : template <typename Byte = std::byte>
      62             : std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
      63             : /** Like TryParseHex, but returns an empty vector on invalid input. */
      64             : template <typename Byte = uint8_t>
      65           6 : std::vector<Byte> ParseHex(std::string_view hex_str)
      66             : {
      67           6 :     return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
      68           0 : }
      69             : signed char HexDigit(char c);
      70             : /* Returns true if each character in str is a hex character, and has an even
      71             :  * number of hex digits.*/
      72             : bool IsHex(std::string_view str);
      73             : /**
      74             : * Return true if the string is a hex number, optionally prefixed with "0x"
      75             : */
      76             : bool IsHexNumber(std::string_view str);
      77             : std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str);
      78             : std::string EncodeBase64(Span<const unsigned char> input);
      79           0 : inline std::string EncodeBase64(Span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
      80           0 : inline std::string EncodeBase64(std::string_view str) { return EncodeBase64(MakeUCharSpan(str)); }
      81             : std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str);
      82             : 
      83             : /**
      84             :  * Base32 encode.
      85             :  * If `pad` is true, then the output will be padded with '=' so that its length
      86             :  * is a multiple of 8.
      87             :  */
      88             : std::string EncodeBase32(Span<const unsigned char> input, bool pad = true);
      89             : 
      90             : /**
      91             :  * Base32 encode.
      92             :  * If `pad` is true, then the output will be padded with '=' so that its length
      93             :  * is a multiple of 8.
      94             :  */
      95             : std::string EncodeBase32(std::string_view str, bool pad = true);
      96             : 
      97             : /**
      98             :  * Splits socket address string into host string and port value.
      99             :  * Validates port value.
     100             :  *
     101             :  * @param[in] in        The socket address string to split.
     102             :  * @param[out] portOut  Port-portion of the input, if found and parsable.
     103             :  * @param[out] hostOut  Host-portion of the input, if found.
     104             :  * @return              true if port-portion is absent or within its allowed range, otherwise false
     105             :  */
     106             : bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut);
     107             : 
     108             : // LocaleIndependentAtoi is provided for backwards compatibility reasons.
     109             : //
     110             : // New code should use ToIntegral or the ParseInt* functions
     111             : // which provide parse error feedback.
     112             : //
     113             : // The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
     114             : // std::atoi as it behaves under the "C" locale, and remove some undefined
     115             : // behavior. If the parsed value is bigger than the integer type's maximum
     116             : // value, or smaller than the integer type's minimum value, std::atoi has
     117             : // undefined behavior, while this function returns the maximum or minimum
     118             : // values, respectively.
     119             : template <typename T>
     120           1 : T LocaleIndependentAtoi(std::string_view str)
     121             : {
     122             :     static_assert(std::is_integral<T>::value);
     123             :     T result;
     124             :     // Emulate atoi(...) handling of white space and leading +/-.
     125           1 :     std::string_view s = TrimStringView(str);
     126           1 :     if (!s.empty() && s[0] == '+') {
     127           0 :         if (s.length() >= 2 && s[1] == '-') {
     128           0 :             return 0;
     129             :         }
     130           0 :         s = s.substr(1);
     131           0 :     }
     132           1 :     auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
     133           1 :     if (error_condition == std::errc::result_out_of_range) {
     134           0 :         if (s.length() >= 1 && s[0] == '-') {
     135             :             // Saturate underflow, per strtoll's behavior.
     136           0 :             return std::numeric_limits<T>::min();
     137             :         } else {
     138             :             // Saturate overflow, per strtoll's behavior.
     139           0 :             return std::numeric_limits<T>::max();
     140             :         }
     141           1 :     } else if (error_condition != std::errc{}) {
     142           0 :         return 0;
     143             :     }
     144           1 :     return result;
     145           1 : }
     146             : 
     147             : /**
     148             :  * Tests if the given character is a decimal digit.
     149             :  * @param[in] c     character to test
     150             :  * @return          true if the argument is a decimal digit; otherwise false.
     151             :  */
     152         773 : constexpr bool IsDigit(char c)
     153             : {
     154         773 :     return c >= '0' && c <= '9';
     155             : }
     156             : 
     157             : /**
     158             :  * Tests if the given character is a whitespace character. The whitespace characters
     159             :  * are: space, form-feed ('\f'), newline ('\n'), carriage return ('\r'), horizontal
     160             :  * tab ('\t'), and vertical tab ('\v').
     161             :  *
     162             :  * This function is locale independent. Under the C locale this function gives the
     163             :  * same result as std::isspace.
     164             :  *
     165             :  * @param[in] c     character to test
     166             :  * @return          true if the argument is a whitespace character; otherwise false
     167             :  */
     168         442 : constexpr inline bool IsSpace(char c) noexcept {
     169         442 :     return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
     170             : }
     171             : 
     172             : /**
     173             :  * Convert string to integral type T. Leading whitespace, a leading +, or any
     174             :  * trailing character fail the parsing. The required format expressed as regex
     175             :  * is `-?[0-9]+`. The minus sign is only permitted for signed integer types.
     176             :  *
     177             :  * @returns std::nullopt if the entire string could not be parsed, or if the
     178             :  *   parsed value is not in the range representable by the type T.
     179             :  */
     180             : template <typename T>
     181           0 : std::optional<T> ToIntegral(std::string_view str)
     182             : {
     183             :     static_assert(std::is_integral<T>::value);
     184             :     T result;
     185           0 :     const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
     186           0 :     if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
     187           0 :         return std::nullopt;
     188             :     }
     189           0 :     return result;
     190           0 : }
     191             : 
     192             : /**
     193             :  * Convert string to signed 32-bit integer with strict parse error feedback.
     194             :  * @returns true if the entire string could be parsed as valid integer,
     195             :  *   false if not the entire string could be parsed or when overflow or underflow occurred.
     196             :  */
     197             : [[nodiscard]] bool ParseInt32(std::string_view str, int32_t *out);
     198             : 
     199             : /**
     200             :  * Convert string to signed 64-bit integer with strict parse error feedback.
     201             :  * @returns true if the entire string could be parsed as valid integer,
     202             :  *   false if not the entire string could be parsed or when overflow or underflow occurred.
     203             :  */
     204             : [[nodiscard]] bool ParseInt64(std::string_view str, int64_t *out);
     205             : 
     206             : /**
     207             :  * Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
     208             :  * @returns true if the entire string could be parsed as valid integer,
     209             :  *   false if not the entire string could be parsed or when overflow or underflow occurred.
     210             :  */
     211             : [[nodiscard]] bool ParseUInt8(std::string_view str, uint8_t *out);
     212             : 
     213             : /**
     214             :  * Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
     215             :  * @returns true if the entire string could be parsed as valid integer,
     216             :  *   false if the entire string could not be parsed or if overflow or underflow occurred.
     217             :  */
     218             : [[nodiscard]] bool ParseUInt16(std::string_view str, uint16_t* out);
     219             : 
     220             : /**
     221             :  * Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
     222             :  * @returns true if the entire string could be parsed as valid integer,
     223             :  *   false if not the entire string could be parsed or when overflow or underflow occurred.
     224             :  */
     225             : [[nodiscard]] bool ParseUInt32(std::string_view str, uint32_t *out);
     226             : 
     227             : /**
     228             :  * Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
     229             :  * @returns true if the entire string could be parsed as valid integer,
     230             :  *   false if not the entire string could be parsed or when overflow or underflow occurred.
     231             :  */
     232             : [[nodiscard]] bool ParseUInt64(std::string_view str, uint64_t *out);
     233             : 
     234             : /**
     235             :  * Convert a span of bytes to a lower-case hexadecimal string.
     236             :  */
     237             : std::string HexStr(const Span<const uint8_t> s);
     238           0 : inline std::string HexStr(const Span<const char> s) { return HexStr(MakeUCharSpan(s)); }
     239           0 : inline std::string HexStr(const Span<const std::byte> s) { return HexStr(MakeUCharSpan(s)); }
     240             : 
     241             : /**
     242             :  * Format a paragraph of text to a fixed width, adding spaces for
     243             :  * indentation to any added line.
     244             :  */
     245             : std::string FormatParagraph(std::string_view in, size_t width = 79, size_t indent = 0);
     246             : 
     247             : /**
     248             :  * Timing-attack-resistant comparison.
     249             :  * Takes time proportional to length
     250             :  * of first argument.
     251             :  */
     252             : template <typename T>
     253           0 : bool TimingResistantEqual(const T& a, const T& b)
     254             : {
     255           0 :     if (b.size() == 0) return a.size() == 0;
     256           0 :     size_t accumulator = a.size() ^ b.size();
     257           0 :     for (size_t i = 0; i < a.size(); i++)
     258           0 :         accumulator |= size_t(a[i] ^ b[i%b.size()]);
     259           0 :     return accumulator == 0;
     260           0 : }
     261             : 
     262             : /** Parse number as fixed point according to JSON number syntax.
     263             :  * See https://json.org/number.gif
     264             :  * @returns true on success, false on error.
     265             :  * @note The result must be in the range (-10^18,10^18), otherwise an overflow error will trigger.
     266             :  */
     267             : [[nodiscard]] bool ParseFixedPoint(std::string_view, int decimals, int64_t *amount_out);
     268             : 
     269             : namespace {
     270             : /** Helper class for the default infn argument to ConvertBits (just returns the input). */
     271             : struct IntIdentity
     272             : {
     273           0 :     [[maybe_unused]] int operator()(int x) const { return x; }
     274             : };
     275             : 
     276             : } // namespace
     277             : 
     278             : /** Convert from one power-of-2 number base to another. */
     279             : template<int frombits, int tobits, bool pad, typename O, typename It, typename I = IntIdentity>
     280           0 : bool ConvertBits(O outfn, It it, It end, I infn = {}) {
     281           0 :     size_t acc = 0;
     282           0 :     size_t bits = 0;
     283           0 :     constexpr size_t maxv = (1 << tobits) - 1;
     284           0 :     constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
     285           0 :     while (it != end) {
     286           0 :         int v = infn(*it);
     287           0 :         if (v < 0) return false;
     288           0 :         acc = ((acc << frombits) | v) & max_acc;
     289           0 :         bits += frombits;
     290           0 :         while (bits >= tobits) {
     291           0 :             bits -= tobits;
     292           0 :             outfn((acc >> bits) & maxv);
     293             :         }
     294           0 :         ++it;
     295             :     }
     296             :     if (pad) {
     297           0 :         if (bits) outfn((acc << (tobits - bits)) & maxv);
     298           0 :     } else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
     299           0 :         return false;
     300             :     }
     301           0 :     return true;
     302           0 : }
     303             : 
     304             : /**
     305             :  * Converts the given character to its lowercase equivalent.
     306             :  * This function is locale independent. It only converts uppercase
     307             :  * characters in the standard 7-bit ASCII range.
     308             :  * This is a feature, not a limitation.
     309             :  *
     310             :  * @param[in] c     the character to convert to lowercase.
     311             :  * @return          the lowercase equivalent of c; or the argument
     312             :  *                  if no conversion is possible.
     313             :  */
     314          44 : constexpr char ToLower(char c)
     315             : {
     316          44 :     return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
     317             : }
     318             : 
     319             : /**
     320             :  * Returns the lowercase equivalent of the given string.
     321             :  * This function is locale independent. It only converts uppercase
     322             :  * characters in the standard 7-bit ASCII range.
     323             :  * This is a feature, not a limitation.
     324             :  *
     325             :  * @param[in] str   the string to convert to lowercase.
     326             :  * @returns         lowercased equivalent of str
     327             :  */
     328             : std::string ToLower(std::string_view str);
     329             : 
     330             : /**
     331             :  * Converts the given character to its uppercase equivalent.
     332             :  * This function is locale independent. It only converts lowercase
     333             :  * characters in the standard 7-bit ASCII range.
     334             :  * This is a feature, not a limitation.
     335             :  *
     336             :  * @param[in] c     the character to convert to uppercase.
     337             :  * @return          the uppercase equivalent of c; or the argument
     338             :  *                  if no conversion is possible.
     339             :  */
     340           0 : constexpr char ToUpper(char c)
     341             : {
     342           0 :     return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
     343             : }
     344             : 
     345             : /**
     346             :  * Returns the uppercase equivalent of the given string.
     347             :  * This function is locale independent. It only converts lowercase
     348             :  * characters in the standard 7-bit ASCII range.
     349             :  * This is a feature, not a limitation.
     350             :  *
     351             :  * @param[in] str   the string to convert to uppercase.
     352             :  * @returns         UPPERCASED EQUIVALENT OF str
     353             :  */
     354             : std::string ToUpper(std::string_view str);
     355             : 
     356             : /**
     357             :  * Capitalizes the first character of the given string.
     358             :  * This function is locale independent. It only converts lowercase
     359             :  * characters in the standard 7-bit ASCII range.
     360             :  * This is a feature, not a limitation.
     361             :  *
     362             :  * @param[in] str   the string to capitalize.
     363             :  * @returns         string with the first letter capitalized.
     364             :  */
     365             : std::string Capitalize(std::string str);
     366             : 
     367             : /**
     368             :  * Parse a string with suffix unit [k|K|m|M|g|G|t|T].
     369             :  * Must be a whole integer, fractions not allowed (0.5t), no whitespace or +-
     370             :  * Lowercase units are 1000 base. Uppercase units are 1024 base.
     371             :  * Examples: 2m,27M,19g,41T
     372             :  *
     373             :  * @param[in] str                  the string to convert into bytes
     374             :  * @param[in] default_multiplier   if no unit is found in str use this unit
     375             :  * @returns                        optional uint64_t bytes from str or nullopt
     376             :  *                                 if ToIntegral is false, str is empty, trailing whitespace or overflow
     377             :  */
     378             : std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
     379             : 
     380             : #endif // BITCOIN_UTIL_STRENCODINGS_H

Generated by: LCOV version 1.14