Line data Source code
1 : // Copyright (c) 2009-2010 Satoshi Nakamoto
2 : // Copyright (c) 2009-2022 The Bitcoin Core developers
3 : // Distributed under the MIT software license, see the accompanying
4 : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
5 :
6 : /**
7 : * Utilities for converting data from/to strings.
8 : */
9 : #ifndef BITCOIN_UTIL_STRENCODINGS_H
10 : #define BITCOIN_UTIL_STRENCODINGS_H
11 :
12 : #include <span.h>
13 : #include <util/string.h>
14 :
15 : #include <charconv>
16 : #include <cstddef>
17 : #include <cstdint>
18 : #include <limits>
19 : #include <optional>
20 : #include <string> // IWYU pragma: export
21 : #include <string_view> // IWYU pragma: export
22 : #include <system_error>
23 : #include <type_traits>
24 : #include <vector>
25 :
26 : /** Used by SanitizeString() */
27 : enum SafeChars
28 : {
29 : SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
30 : SAFE_CHARS_UA_COMMENT, //!< BIP-0014 subset
31 : SAFE_CHARS_FILENAME, //!< Chars allowed in filenames
32 : SAFE_CHARS_URI, //!< Chars allowed in URIs (RFC 3986)
33 : };
34 :
35 : /**
36 : * Used by ParseByteUnits()
37 : * Lowercase base 1000
38 : * Uppercase base 1024
39 : */
40 : enum class ByteUnit : uint64_t {
41 : NOOP = 1ULL,
42 : k = 1000ULL,
43 : K = 1024ULL,
44 : m = 1'000'000ULL,
45 : M = 1ULL << 20,
46 : g = 1'000'000'000ULL,
47 : G = 1ULL << 30,
48 : t = 1'000'000'000'000ULL,
49 : T = 1ULL << 40,
50 : };
51 :
52 : /**
53 : * Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
54 : * addresses, but avoid anything even possibly remotely dangerous like & or >
55 : * @param[in] str The string to sanitize
56 : * @param[in] rule The set of safe chars to choose (default: least restrictive)
57 : * @return A new string without unsafe chars
58 : */
59 : std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
60 : /** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. Returns nullopt on invalid input. */
61 : template <typename Byte = std::byte>
62 : std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
63 : /** Like TryParseHex, but returns an empty vector on invalid input. */
64 : template <typename Byte = uint8_t>
65 6 : std::vector<Byte> ParseHex(std::string_view hex_str)
66 : {
67 6 : return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
68 0 : }
69 : signed char HexDigit(char c);
70 : /* Returns true if each character in str is a hex character, and has an even
71 : * number of hex digits.*/
72 : bool IsHex(std::string_view str);
73 : /**
74 : * Return true if the string is a hex number, optionally prefixed with "0x"
75 : */
76 : bool IsHexNumber(std::string_view str);
77 : std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str);
78 : std::string EncodeBase64(Span<const unsigned char> input);
79 0 : inline std::string EncodeBase64(Span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
80 0 : inline std::string EncodeBase64(std::string_view str) { return EncodeBase64(MakeUCharSpan(str)); }
81 : std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str);
82 :
83 : /**
84 : * Base32 encode.
85 : * If `pad` is true, then the output will be padded with '=' so that its length
86 : * is a multiple of 8.
87 : */
88 : std::string EncodeBase32(Span<const unsigned char> input, bool pad = true);
89 :
90 : /**
91 : * Base32 encode.
92 : * If `pad` is true, then the output will be padded with '=' so that its length
93 : * is a multiple of 8.
94 : */
95 : std::string EncodeBase32(std::string_view str, bool pad = true);
96 :
97 : /**
98 : * Splits socket address string into host string and port value.
99 : * Validates port value.
100 : *
101 : * @param[in] in The socket address string to split.
102 : * @param[out] portOut Port-portion of the input, if found and parsable.
103 : * @param[out] hostOut Host-portion of the input, if found.
104 : * @return true if port-portion is absent or within its allowed range, otherwise false
105 : */
106 : bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut);
107 :
108 : // LocaleIndependentAtoi is provided for backwards compatibility reasons.
109 : //
110 : // New code should use ToIntegral or the ParseInt* functions
111 : // which provide parse error feedback.
112 : //
113 : // The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
114 : // std::atoi as it behaves under the "C" locale, and remove some undefined
115 : // behavior. If the parsed value is bigger than the integer type's maximum
116 : // value, or smaller than the integer type's minimum value, std::atoi has
117 : // undefined behavior, while this function returns the maximum or minimum
118 : // values, respectively.
119 : template <typename T>
120 1 : T LocaleIndependentAtoi(std::string_view str)
121 : {
122 : static_assert(std::is_integral<T>::value);
123 : T result;
124 : // Emulate atoi(...) handling of white space and leading +/-.
125 1 : std::string_view s = TrimStringView(str);
126 1 : if (!s.empty() && s[0] == '+') {
127 0 : if (s.length() >= 2 && s[1] == '-') {
128 0 : return 0;
129 : }
130 0 : s = s.substr(1);
131 0 : }
132 1 : auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
133 1 : if (error_condition == std::errc::result_out_of_range) {
134 0 : if (s.length() >= 1 && s[0] == '-') {
135 : // Saturate underflow, per strtoll's behavior.
136 0 : return std::numeric_limits<T>::min();
137 : } else {
138 : // Saturate overflow, per strtoll's behavior.
139 0 : return std::numeric_limits<T>::max();
140 : }
141 1 : } else if (error_condition != std::errc{}) {
142 0 : return 0;
143 : }
144 1 : return result;
145 1 : }
146 :
147 : /**
148 : * Tests if the given character is a decimal digit.
149 : * @param[in] c character to test
150 : * @return true if the argument is a decimal digit; otherwise false.
151 : */
152 773 : constexpr bool IsDigit(char c)
153 : {
154 773 : return c >= '0' && c <= '9';
155 : }
156 :
157 : /**
158 : * Tests if the given character is a whitespace character. The whitespace characters
159 : * are: space, form-feed ('\f'), newline ('\n'), carriage return ('\r'), horizontal
160 : * tab ('\t'), and vertical tab ('\v').
161 : *
162 : * This function is locale independent. Under the C locale this function gives the
163 : * same result as std::isspace.
164 : *
165 : * @param[in] c character to test
166 : * @return true if the argument is a whitespace character; otherwise false
167 : */
168 442 : constexpr inline bool IsSpace(char c) noexcept {
169 442 : return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
170 : }
171 :
172 : /**
173 : * Convert string to integral type T. Leading whitespace, a leading +, or any
174 : * trailing character fail the parsing. The required format expressed as regex
175 : * is `-?[0-9]+`. The minus sign is only permitted for signed integer types.
176 : *
177 : * @returns std::nullopt if the entire string could not be parsed, or if the
178 : * parsed value is not in the range representable by the type T.
179 : */
180 : template <typename T>
181 0 : std::optional<T> ToIntegral(std::string_view str)
182 : {
183 : static_assert(std::is_integral<T>::value);
184 : T result;
185 0 : const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
186 0 : if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
187 0 : return std::nullopt;
188 : }
189 0 : return result;
190 0 : }
191 :
192 : /**
193 : * Convert string to signed 32-bit integer with strict parse error feedback.
194 : * @returns true if the entire string could be parsed as valid integer,
195 : * false if not the entire string could be parsed or when overflow or underflow occurred.
196 : */
197 : [[nodiscard]] bool ParseInt32(std::string_view str, int32_t *out);
198 :
199 : /**
200 : * Convert string to signed 64-bit integer with strict parse error feedback.
201 : * @returns true if the entire string could be parsed as valid integer,
202 : * false if not the entire string could be parsed or when overflow or underflow occurred.
203 : */
204 : [[nodiscard]] bool ParseInt64(std::string_view str, int64_t *out);
205 :
206 : /**
207 : * Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
208 : * @returns true if the entire string could be parsed as valid integer,
209 : * false if not the entire string could be parsed or when overflow or underflow occurred.
210 : */
211 : [[nodiscard]] bool ParseUInt8(std::string_view str, uint8_t *out);
212 :
213 : /**
214 : * Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
215 : * @returns true if the entire string could be parsed as valid integer,
216 : * false if the entire string could not be parsed or if overflow or underflow occurred.
217 : */
218 : [[nodiscard]] bool ParseUInt16(std::string_view str, uint16_t* out);
219 :
220 : /**
221 : * Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
222 : * @returns true if the entire string could be parsed as valid integer,
223 : * false if not the entire string could be parsed or when overflow or underflow occurred.
224 : */
225 : [[nodiscard]] bool ParseUInt32(std::string_view str, uint32_t *out);
226 :
227 : /**
228 : * Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
229 : * @returns true if the entire string could be parsed as valid integer,
230 : * false if not the entire string could be parsed or when overflow or underflow occurred.
231 : */
232 : [[nodiscard]] bool ParseUInt64(std::string_view str, uint64_t *out);
233 :
234 : /**
235 : * Convert a span of bytes to a lower-case hexadecimal string.
236 : */
237 : std::string HexStr(const Span<const uint8_t> s);
238 0 : inline std::string HexStr(const Span<const char> s) { return HexStr(MakeUCharSpan(s)); }
239 0 : inline std::string HexStr(const Span<const std::byte> s) { return HexStr(MakeUCharSpan(s)); }
240 :
241 : /**
242 : * Format a paragraph of text to a fixed width, adding spaces for
243 : * indentation to any added line.
244 : */
245 : std::string FormatParagraph(std::string_view in, size_t width = 79, size_t indent = 0);
246 :
247 : /**
248 : * Timing-attack-resistant comparison.
249 : * Takes time proportional to length
250 : * of first argument.
251 : */
252 : template <typename T>
253 0 : bool TimingResistantEqual(const T& a, const T& b)
254 : {
255 0 : if (b.size() == 0) return a.size() == 0;
256 0 : size_t accumulator = a.size() ^ b.size();
257 0 : for (size_t i = 0; i < a.size(); i++)
258 0 : accumulator |= size_t(a[i] ^ b[i%b.size()]);
259 0 : return accumulator == 0;
260 0 : }
261 :
262 : /** Parse number as fixed point according to JSON number syntax.
263 : * See https://json.org/number.gif
264 : * @returns true on success, false on error.
265 : * @note The result must be in the range (-10^18,10^18), otherwise an overflow error will trigger.
266 : */
267 : [[nodiscard]] bool ParseFixedPoint(std::string_view, int decimals, int64_t *amount_out);
268 :
269 : namespace {
270 : /** Helper class for the default infn argument to ConvertBits (just returns the input). */
271 : struct IntIdentity
272 : {
273 0 : [[maybe_unused]] int operator()(int x) const { return x; }
274 : };
275 :
276 : } // namespace
277 :
278 : /** Convert from one power-of-2 number base to another. */
279 : template<int frombits, int tobits, bool pad, typename O, typename It, typename I = IntIdentity>
280 0 : bool ConvertBits(O outfn, It it, It end, I infn = {}) {
281 0 : size_t acc = 0;
282 0 : size_t bits = 0;
283 0 : constexpr size_t maxv = (1 << tobits) - 1;
284 0 : constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
285 0 : while (it != end) {
286 0 : int v = infn(*it);
287 0 : if (v < 0) return false;
288 0 : acc = ((acc << frombits) | v) & max_acc;
289 0 : bits += frombits;
290 0 : while (bits >= tobits) {
291 0 : bits -= tobits;
292 0 : outfn((acc >> bits) & maxv);
293 : }
294 0 : ++it;
295 : }
296 : if (pad) {
297 0 : if (bits) outfn((acc << (tobits - bits)) & maxv);
298 0 : } else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
299 0 : return false;
300 : }
301 0 : return true;
302 0 : }
303 :
304 : /**
305 : * Converts the given character to its lowercase equivalent.
306 : * This function is locale independent. It only converts uppercase
307 : * characters in the standard 7-bit ASCII range.
308 : * This is a feature, not a limitation.
309 : *
310 : * @param[in] c the character to convert to lowercase.
311 : * @return the lowercase equivalent of c; or the argument
312 : * if no conversion is possible.
313 : */
314 44 : constexpr char ToLower(char c)
315 : {
316 44 : return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
317 : }
318 :
319 : /**
320 : * Returns the lowercase equivalent of the given string.
321 : * This function is locale independent. It only converts uppercase
322 : * characters in the standard 7-bit ASCII range.
323 : * This is a feature, not a limitation.
324 : *
325 : * @param[in] str the string to convert to lowercase.
326 : * @returns lowercased equivalent of str
327 : */
328 : std::string ToLower(std::string_view str);
329 :
330 : /**
331 : * Converts the given character to its uppercase equivalent.
332 : * This function is locale independent. It only converts lowercase
333 : * characters in the standard 7-bit ASCII range.
334 : * This is a feature, not a limitation.
335 : *
336 : * @param[in] c the character to convert to uppercase.
337 : * @return the uppercase equivalent of c; or the argument
338 : * if no conversion is possible.
339 : */
340 0 : constexpr char ToUpper(char c)
341 : {
342 0 : return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
343 : }
344 :
345 : /**
346 : * Returns the uppercase equivalent of the given string.
347 : * This function is locale independent. It only converts lowercase
348 : * characters in the standard 7-bit ASCII range.
349 : * This is a feature, not a limitation.
350 : *
351 : * @param[in] str the string to convert to uppercase.
352 : * @returns UPPERCASED EQUIVALENT OF str
353 : */
354 : std::string ToUpper(std::string_view str);
355 :
356 : /**
357 : * Capitalizes the first character of the given string.
358 : * This function is locale independent. It only converts lowercase
359 : * characters in the standard 7-bit ASCII range.
360 : * This is a feature, not a limitation.
361 : *
362 : * @param[in] str the string to capitalize.
363 : * @returns string with the first letter capitalized.
364 : */
365 : std::string Capitalize(std::string str);
366 :
367 : /**
368 : * Parse a string with suffix unit [k|K|m|M|g|G|t|T].
369 : * Must be a whole integer, fractions not allowed (0.5t), no whitespace or +-
370 : * Lowercase units are 1000 base. Uppercase units are 1024 base.
371 : * Examples: 2m,27M,19g,41T
372 : *
373 : * @param[in] str the string to convert into bytes
374 : * @param[in] default_multiplier if no unit is found in str use this unit
375 : * @returns optional uint64_t bytes from str or nullopt
376 : * if ToIntegral is false, str is empty, trailing whitespace or overflow
377 : */
378 : std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
379 :
380 : #endif // BITCOIN_UTIL_STRENCODINGS_H
|