LCOV - code coverage report
Current view: top level - src/test/fuzz - FuzzedDataProvider.h (source / functions) Hit Total Coverage
Test: fuzz_coverage.info Lines: 78 114 68.4 %
Date: 2023-09-26 12:08:55 Functions: 22 66 33.3 %

          Line data    Source code
       1             : //===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
       2             : //
       3             : // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
       4             : // See https://llvm.org/LICENSE.txt for license information.
       5             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
       6             : //
       7             : //===----------------------------------------------------------------------===//
       8             : // A single header library providing an utility class to break up an array of
       9             : // bytes. Whenever run on the same input, provides the same output, as long as
      10             : // its methods are called in the same order, with the same arguments.
      11             : //===----------------------------------------------------------------------===//
      12             : 
      13             : #ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
      14             : #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
      15             : 
      16             : #include <algorithm>
      17             : #include <array>
      18             : #include <climits>
      19             : #include <cstddef>
      20             : #include <cstdint>
      21             : #include <cstring>
      22             : #include <initializer_list>
      23             : #include <limits>
      24             : #include <string>
      25             : #include <type_traits>
      26             : #include <utility>
      27             : #include <vector>
      28             : 
      29             : // In addition to the comments below, the API is also briefly documented at
      30             : // https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
      31             : class FuzzedDataProvider {
      32             :  public:
      33             :   // |data| is an array of length |size| that the FuzzedDataProvider wraps to
      34             :   // provide more granular access. |data| must outlive the FuzzedDataProvider.
      35       49082 :   FuzzedDataProvider(const uint8_t *data, size_t size)
      36       49082 :       : data_ptr_(data), remaining_bytes_(size) {}
      37             :   ~FuzzedDataProvider() = default;
      38             : 
      39             :   // See the implementation below (after the class definition) for more verbose
      40             :   // comments for each of the methods.
      41             : 
      42             :   // Methods returning std::vector of bytes. These are the most popular choice
      43             :   // when splitting fuzzing input into pieces, as every piece is put into a
      44             :   // separate buffer (i.e. ASan would catch any under-/overflow) and the memory
      45             :   // will be released automatically.
      46             :   template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes);
      47             :   template <typename T>
      48             :   std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0);
      49             :   template <typename T> std::vector<T> ConsumeRemainingBytes();
      50             : 
      51             :   // Methods returning strings. Use only when you need a std::string or a null
      52             :   // terminated C-string. Otherwise, prefer the methods returning std::vector.
      53             :   std::string ConsumeBytesAsString(size_t num_bytes);
      54             :   std::string ConsumeRandomLengthString(size_t max_length);
      55             :   std::string ConsumeRandomLengthString();
      56             :   std::string ConsumeRemainingBytesAsString();
      57             : 
      58             :   // Methods returning integer values.
      59             :   template <typename T> T ConsumeIntegral();
      60             :   template <typename T> T ConsumeIntegralInRange(T min, T max);
      61             : 
      62             :   // Methods returning floating point values.
      63             :   template <typename T> T ConsumeFloatingPoint();
      64             :   template <typename T> T ConsumeFloatingPointInRange(T min, T max);
      65             : 
      66             :   // 0 <= return value <= 1.
      67             :   template <typename T> T ConsumeProbability();
      68             : 
      69             :   bool ConsumeBool();
      70             : 
      71             :   // Returns a value chosen from the given enum.
      72             :   template <typename T> T ConsumeEnum();
      73             : 
      74             :   // Returns a value from the given array.
      75             :   template <typename T, size_t size> T PickValueInArray(const T (&array)[size]);
      76             :   template <typename T, size_t size>
      77             :   T PickValueInArray(const std::array<T, size> &array);
      78             :   template <typename T> T PickValueInArray(std::initializer_list<const T> list);
      79             : 
      80             :   // Writes data to the given destination and returns number of bytes written.
      81             :   size_t ConsumeData(void *destination, size_t num_bytes);
      82             : 
      83             :   // Reports the remaining bytes available for fuzzed input.
      84           0 :   size_t remaining_bytes() { return remaining_bytes_; }
      85             : 
      86             :  private:
      87             :   FuzzedDataProvider(const FuzzedDataProvider &) = delete;
      88             :   FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
      89             : 
      90             :   void CopyAndAdvance(void *destination, size_t num_bytes);
      91             : 
      92             :   void Advance(size_t num_bytes);
      93             : 
      94             :   template <typename T>
      95             :   std::vector<T> ConsumeBytes(size_t size, size_t num_bytes);
      96             : 
      97             :   template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value);
      98             : 
      99             :   const uint8_t *data_ptr_;
     100             :   size_t remaining_bytes_;
     101             : };
     102             : 
     103             : // Returns a std::vector containing |num_bytes| of input data. If fewer than
     104             : // |num_bytes| of data remain, returns a shorter std::vector containing all
     105             : // of the data that's left. Can be used with any byte sized type, such as
     106             : // char, unsigned char, uint8_t, etc.
     107             : template <typename T>
     108       16200 : std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) {
     109       16200 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     110       16200 :   return ConsumeBytes<T>(num_bytes, num_bytes);
     111             : }
     112             : 
     113             : // Similar to |ConsumeBytes|, but also appends the terminator value at the end
     114             : // of the resulting vector. Useful, when a mutable null-terminated C-string is
     115             : // needed, for example. But that is a rare case. Better avoid it, if possible,
     116             : // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
     117             : template <typename T>
     118             : std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes,
     119             :                                                               T terminator) {
     120             :   num_bytes = std::min(num_bytes, remaining_bytes_);
     121             :   std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
     122             :   result.back() = terminator;
     123             :   return result;
     124             : }
     125             : 
     126             : // Returns a std::vector containing all remaining bytes of the input data.
     127             : template <typename T>
     128           0 : std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() {
     129           0 :   return ConsumeBytes<T>(remaining_bytes_);
     130             : }
     131             : 
     132             : // Returns a std::string containing |num_bytes| of input data. Using this and
     133             : // |.c_str()| on the resulting string is the best way to get an immutable
     134             : // null-terminated C string. If fewer than |num_bytes| of data remain, returns
     135             : // a shorter std::string containing all of the data that's left.
     136        2187 : inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) {
     137             :   static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
     138             :                 "ConsumeBytesAsString cannot convert the data to a string.");
     139             : 
     140        2187 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     141        4374 :   std::string result(
     142        2187 :       reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes);
     143        2187 :   Advance(num_bytes);
     144        2187 :   return result;
     145        2187 : }
     146             : 
     147             : // Returns a std::string of length from 0 to |max_length|. When it runs out of
     148             : // input data, returns what remains of the input. Designed to be more stable
     149             : // with respect to a fuzzer inserting characters than just picking a random
     150             : // length and then consuming that many bytes with |ConsumeBytes|.
     151             : inline std::string
     152       50462 : FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) {
     153             :   // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
     154             :   // followed by anything else to the end of the string. As a result of this
     155             :   // logic, a fuzzer can insert characters into the string, and the string
     156             :   // will be lengthened to include those new characters, resulting in a more
     157             :   // stable fuzzer than picking the length of a string independently from
     158             :   // picking its contents.
     159       50462 :   std::string result;
     160             : 
     161             :   // Reserve the anticipated capaticity to prevent several reallocations.
     162       50462 :   result.reserve(std::min(max_length, remaining_bytes_));
     163     2206323 :   for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
     164     2197790 :     char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
     165     2197790 :     Advance(1);
     166     2197790 :     if (next == '\\' && remaining_bytes_ != 0) {
     167       46416 :       next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
     168       46416 :       Advance(1);
     169       46416 :       if (next != '\\')
     170       41929 :         break;
     171        4487 :     }
     172     2155861 :     result += next;
     173     2155861 :   }
     174             : 
     175       50462 :   result.shrink_to_fit();
     176       50462 :   return result;
     177       50462 : }
     178             : 
     179             : // Returns a std::string of length from 0 to |remaining_bytes_|.
     180       31902 : inline std::string FuzzedDataProvider::ConsumeRandomLengthString() {
     181       31902 :   return ConsumeRandomLengthString(remaining_bytes_);
     182             : }
     183             : 
     184             : // Returns a std::string containing all remaining bytes of the input data.
     185             : // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
     186             : // object.
     187        1998 : inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() {
     188        1998 :   return ConsumeBytesAsString(remaining_bytes_);
     189             : }
     190             : 
     191             : // Returns a number in the range [Type's min, Type's max]. The value might
     192             : // not be uniformly distributed in the given range. If there's no input data
     193             : // left, always returns |min|.
     194     2015643 : template <typename T> T FuzzedDataProvider::ConsumeIntegral() {
     195     4031286 :   return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
     196     2015643 :                                 std::numeric_limits<T>::max());
     197             : }
     198             : 
     199             : // Returns a number in the range [min, max] by consuming bytes from the
     200             : // input data. The value might not be uniformly distributed in the given
     201             : // range. If there's no input data left, always returns |min|. |min| must
     202             : // be less than or equal to |max|.
     203             : template <typename T>
     204     4574931 : T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) {
     205             :   static_assert(std::is_integral<T>::value, "An integral type is required.");
     206             :   static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
     207             : 
     208     4574931 :   if (min > max)
     209           0 :     abort();
     210             : 
     211             :   // Use the biggest type possible to hold the range and the result.
     212     4574931 :   uint64_t range = static_cast<uint64_t>(max) - static_cast<uint64_t>(min);
     213     4574931 :   uint64_t result = 0;
     214     4574931 :   size_t offset = 0;
     215             : 
     216    12679292 :   while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
     217     5307267 :          remaining_bytes_ != 0) {
     218             :     // Pull bytes off the end of the seed data. Experimentally, this seems to
     219             :     // allow the fuzzer to more easily explore the input space. This makes
     220             :     // sense, since it works by modifying inputs that caused new code to run,
     221             :     // and this data is often used to encode length of data read by
     222             :     // |ConsumeBytes|. Separating out read lengths makes it easier modify the
     223             :     // contents of the data that is actually read.
     224     3547785 :     --remaining_bytes_;
     225     3547785 :     result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
     226     3547785 :     offset += CHAR_BIT;
     227             :   }
     228             : 
     229             :   // Avoid division by 0, in case |range + 1| results in overflow.
     230     4574931 :   if (range != std::numeric_limits<decltype(range)>::max())
     231     4545679 :     result = result % (range + 1);
     232             : 
     233     4574931 :   return static_cast<T>(static_cast<uint64_t>(min) + result);
     234             : }
     235             : 
     236             : // Returns a floating point value in the range [Type's lowest, Type's max] by
     237             : // consuming bytes from the input data. If there's no input data left, always
     238             : // returns approximately 0.
     239           0 : template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() {
     240           0 :   return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
     241           0 :                                         std::numeric_limits<T>::max());
     242             : }
     243             : 
     244             : // Returns a floating point value in the given range by consuming bytes from
     245             : // the input data. If there's no input data left, returns |min|. Note that
     246             : // |min| must be less than or equal to |max|.
     247             : template <typename T>
     248           0 : T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) {
     249           0 :   if (min > max)
     250           0 :     abort();
     251             : 
     252           0 :   T range = .0;
     253           0 :   T result = min;
     254           0 :   constexpr T zero(.0);
     255           0 :   if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
     256             :     // The diff |max - min| would overflow the given floating point type. Use
     257             :     // the half of the diff as the range and consume a bool to decide whether
     258             :     // the result is in the first of the second part of the diff.
     259           0 :     range = (max / 2.0) - (min / 2.0);
     260           0 :     if (ConsumeBool()) {
     261           0 :       result += range;
     262           0 :     }
     263           0 :   } else {
     264           0 :     range = max - min;
     265             :   }
     266             : 
     267           0 :   return result + range * ConsumeProbability<T>();
     268             : }
     269             : 
     270             : // Returns a floating point number in the range [0.0, 1.0]. If there's no
     271             : // input data left, always returns 0.
     272           0 : template <typename T> T FuzzedDataProvider::ConsumeProbability() {
     273             :   static_assert(std::is_floating_point<T>::value,
     274             :                 "A floating point type is required.");
     275             : 
     276             :   // Use different integral types for different floating point types in order
     277             :   // to provide better density of the resulting values.
     278             :   using IntegralType =
     279             :       typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
     280             :                                 uint64_t>::type;
     281             : 
     282           0 :   T result = static_cast<T>(ConsumeIntegral<IntegralType>());
     283           0 :   result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
     284           0 :   return result;
     285             : }
     286             : 
     287             : // Reads one byte and returns a bool, or false when no data remains.
     288     1400182 : inline bool FuzzedDataProvider::ConsumeBool() {
     289     1400182 :   return 1 & ConsumeIntegral<uint8_t>();
     290             : }
     291             : 
     292             : // Returns an enum value. The enum must start at 0 and be contiguous. It must
     293             : // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
     294             : // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
     295             : template <typename T> T FuzzedDataProvider::ConsumeEnum() {
     296             :   static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
     297             :   return static_cast<T>(
     298             :       ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue)));
     299             : }
     300             : 
     301             : // Returns a copy of the value selected from the given fixed-size |array|.
     302             : template <typename T, size_t size>
     303           0 : T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) {
     304             :   static_assert(size > 0, "The array must be non empty.");
     305           0 :   return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
     306             : }
     307             : 
     308             : template <typename T, size_t size>
     309           0 : T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) {
     310             :   static_assert(size > 0, "The array must be non empty.");
     311           0 :   return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
     312             : }
     313             : 
     314             : template <typename T>
     315       75981 : T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) {
     316             :   // TODO(Dor1s): switch to static_assert once C++14 is allowed.
     317       75981 :   if (!list.size())
     318           0 :     abort();
     319             : 
     320       75981 :   return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
     321             : }
     322             : 
     323             : // Writes |num_bytes| of input data to the given destination pointer. If there
     324             : // is not enough data left, writes all remaining bytes. Return value is the
     325             : // number of bytes written.
     326             : // In general, it's better to avoid using this function, but it may be useful
     327             : // in cases when it's necessary to fill a certain buffer or object with
     328             : // fuzzing data.
     329           0 : inline size_t FuzzedDataProvider::ConsumeData(void *destination,
     330             :                                               size_t num_bytes) {
     331           0 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     332           0 :   CopyAndAdvance(destination, num_bytes);
     333           0 :   return num_bytes;
     334             : }
     335             : 
     336             : // Private methods.
     337       22693 : inline void FuzzedDataProvider::CopyAndAdvance(void *destination,
     338             :                                                size_t num_bytes) {
     339       22693 :   std::memcpy(destination, data_ptr_, num_bytes);
     340       22693 :   Advance(num_bytes);
     341       22693 : }
     342             : 
     343     2269086 : inline void FuzzedDataProvider::Advance(size_t num_bytes) {
     344     2269086 :   if (num_bytes > remaining_bytes_)
     345           0 :     abort();
     346             : 
     347     2269086 :   data_ptr_ += num_bytes;
     348     2269086 :   remaining_bytes_ -= num_bytes;
     349     2269086 : }
     350             : 
     351             : template <typename T>
     352       16200 : std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) {
     353             :   static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
     354             : 
     355             :   // The point of using the size-based constructor below is to increase the
     356             :   // odds of having a vector object with capacity being equal to the length.
     357             :   // That part is always implementation specific, but at least both libc++ and
     358             :   // libstdc++ allocate the requested number of bytes in that constructor,
     359             :   // which seems to be a natural choice for other implementations as well.
     360             :   // To increase the odds even more, we also call |shrink_to_fit| below.
     361       16200 :   std::vector<T> result(size);
     362       16200 :   if (size == 0) {
     363        1044 :     if (num_bytes != 0)
     364           0 :       abort();
     365        1044 :     return result;
     366             :   }
     367             : 
     368       15156 :   CopyAndAdvance(result.data(), num_bytes);
     369             : 
     370             :   // Even though |shrink_to_fit| is also implementation specific, we expect it
     371             :   // to provide an additional assurance in case vector's constructor allocated
     372             :   // a buffer which is larger than the actual amount of data we put inside it.
     373       15156 :   result.shrink_to_fit();
     374       15156 :   return result;
     375       16200 : }
     376             : 
     377             : template <typename TS, typename TU>
     378     2244206 : TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) {
     379             :   static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
     380             :   static_assert(!std::numeric_limits<TU>::is_signed,
     381             :                 "Source type must be unsigned.");
     382             : 
     383             :   // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
     384             :   if (std::numeric_limits<TS>::is_modulo)
     385             :     return static_cast<TS>(value);
     386             : 
     387             :   // Avoid using implementation-defined unsigned to signed conversions.
     388             :   // To learn more, see https://stackoverflow.com/questions/13150449.
     389     2244206 :   if (value <= std::numeric_limits<TS>::max()) {
     390     1609236 :     return static_cast<TS>(value);
     391             :   } else {
     392      634970 :     constexpr auto TS_min = std::numeric_limits<TS>::min();
     393      634970 :     return TS_min + static_cast<TS>(value - TS_min);
     394             :   }
     395     2244206 : }
     396             : 
     397             : #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_

Generated by: LCOV version 1.14