/bitcoin/src/leveldb/db/log_reader.cc

Source
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "db/log_reader.h"

#include <stdio.h>

#include "leveldb/env.h"
#include "util/coding.h"
#include "util/crc32c.h"

namespace leveldb {
namespace log {

Reader::Reporter::~Reporter() = default;

Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
               uint64_t initial_offset)
    : file_(file),
      reporter_(reporter),
      checksum_(checksum),
      backing_store_(new char[kBlockSize]),
      buffer_(),
      eof_(false),
      last_record_offset_(0),
      end_of_buffer_offset_(0),
      initial_offset_(initial_offset),
      resyncing_(initial_offset > 0) {}

Reader::~Reader() { delete[] backing_store_; }

bool Reader::SkipToInitialBlock() {
  const size_t offset_in_block = initial_offset_ % kBlockSize;
  uint64_t block_start_location = initial_offset_ - offset_in_block;

  // Don't search a block if we'd be in the trailer
  if (offset_in_block > kBlockSize - 6) {
    block_start_location += kBlockSize;
  }

  end_of_buffer_offset_ = block_start_location;

  // Skip to start of first block that can contain the initial record
  if (block_start_location > 0) {
    Status skip_status = file_->Skip(block_start_location);
    if (!skip_status.ok()) {
      ReportDrop(block_start_location, skip_status);
      return false;
    }
  }

  return true;
}

bool Reader::ReadRecord(Slice* record, std::string* scratch) {
  if (last_record_offset_ < initial_offset_) {
    if (!SkipToInitialBlock()) {
      return false;
    }
  }

  scratch->clear();
  record->clear();
  bool in_fragmented_record = false;
  // Record offset of the logical record that we're reading
  // 0 is a dummy value to make compilers happy
  uint64_t prospective_record_offset = 0;

  Slice fragment;
  while (true) {
    const unsigned int record_type = ReadPhysicalRecord(&fragment);

    // ReadPhysicalRecord may have only had an empty trailer remaining in its
    // internal buffer. Calculate the offset of the next physical record now
    // that it has returned, properly accounting for its header size.
    uint64_t physical_record_offset =
        end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();

    if (resyncing_) {
      if (record_type == kMiddleType) {
        continue;
      } else if (record_type == kLastType) {
        resyncing_ = false;
        continue;
      } else {
        resyncing_ = false;
      }
    }

    switch (record_type) {
      case kFullType:
        if (in_fragmented_record) {
          // Handle bug in earlier versions of log::Writer where
          // it could emit an empty kFirstType record at the tail end
          // of a block followed by a kFullType or kFirstType record
          // at the beginning of the next block.
          if (!scratch->empty()) {
            ReportCorruption(scratch->size(), "partial record without end(1)");
          }
        }
        prospective_record_offset = physical_record_offset;
        scratch->clear();
        *record = fragment;
        last_record_offset_ = prospective_record_offset;
        return true;

      case kFirstType:
        if (in_fragmented_record) {
          // Handle bug in earlier versions of log::Writer where
          // it could emit an empty kFirstType record at the tail end
          // of a block followed by a kFullType or kFirstType record
          // at the beginning of the next block.
          if (!scratch->empty()) {
            ReportCorruption(scratch->size(), "partial record without end(2)");
          }
        }
        prospective_record_offset = physical_record_offset;
        scratch->assign(fragment.data(), fragment.size());
        in_fragmented_record = true;
        break;

      case kMiddleType:
        if (!in_fragmented_record) {
          ReportCorruption(fragment.size(),
                           "missing start of fragmented record(1)");
        } else {
          scratch->append(fragment.data(), fragment.size());
        }
        break;

      case kLastType:
        if (!in_fragmented_record) {
          ReportCorruption(fragment.size(),
                           "missing start of fragmented record(2)");
        } else {
          scratch->append(fragment.data(), fragment.size());
          *record = Slice(*scratch);
          last_record_offset_ = prospective_record_offset;
          return true;
        }
        break;

      case kEof:
        if (in_fragmented_record) {
          // This can be caused by the writer dying immediately after
          // writing a physical record but before completing the next; don't
          // treat it as a corruption, just ignore the entire logical record.
          scratch->clear();
        }
        return false;

      case kBadRecord:
        if (in_fragmented_record) {
          ReportCorruption(scratch->size(), "error in middle of record");
          in_fragmented_record = false;
          scratch->clear();
        }
        break;

      default: {
        char buf[40];
        snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
        ReportCorruption(
            (fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
            buf);
        in_fragmented_record = false;
        scratch->clear();
        break;
      }
    }
  }
  return false;
}

uint64_t Reader::LastRecordOffset() { return last_record_offset_; }

void Reader::ReportCorruption(uint64_t bytes, const char* reason) {
  ReportDrop(bytes, Status::Corruption(reason, file_->GetName()));
}

void Reader::ReportDrop(uint64_t bytes, const Status& reason) {
  if (reporter_ != nullptr &&
      end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
    reporter_->Corruption(static_cast<size_t>(bytes), reason);
  }
}

unsigned int Reader::ReadPhysicalRecord(Slice* result) {
  while (true) {
    if (buffer_.size() < kHeaderSize) {
      if (!eof_) {
        // Last read was a full read, so this is a trailer to skip
        buffer_.clear();
        Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
        end_of_buffer_offset_ += buffer_.size();
        if (!status.ok()) {
          buffer_.clear();
          ReportDrop(kBlockSize, status);
          eof_ = true;
          return kEof;
        } else if (buffer_.size() < kBlockSize) {
          eof_ = true;
        }
        continue;
      } else {
        // Note that if buffer_ is non-empty, we have a truncated header at the
        // end of the file, which can be caused by the writer crashing in the
        // middle of writing the header. Instead of considering this an error,
        // just report EOF.
        buffer_.clear();
        return kEof;
      }
    }

    // Parse the header
    const char* header = buffer_.data();
    const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
    const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
    const unsigned int type = header[6];
    const uint32_t length = a | (b << 8);
    if (kHeaderSize + length > buffer_.size()) {
      size_t drop_size = buffer_.size();
      buffer_.clear();
      if (!eof_) {
        ReportCorruption(drop_size, "bad record length");
        return kBadRecord;
      }
      // If the end of the file has been reached without reading |length| bytes
      // of payload, assume the writer died in the middle of writing the record.
      // Don't report a corruption.
      return kEof;
    }

    if (type == kZeroType && length == 0) {
      // Skip zero length record without reporting any drops since
      // such records are produced by the mmap based writing code in
      // env_posix.cc that preallocates file regions.
      buffer_.clear();
      return kBadRecord;
    }

    // Check crc
    if (checksum_) {
      uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
      uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
      if (actual_crc != expected_crc) {
        // Drop the rest of the buffer since "length" itself may have
        // been corrupted and if we trust it, we could find some
        // fragment of a real log record that just happens to look
        // like a valid log record.
        size_t drop_size = buffer_.size();
        buffer_.clear();
        ReportCorruption(drop_size, "checksum mismatch");
        return kBadRecord;
      }
    }

    buffer_.remove_prefix(kHeaderSize + length);

    // Skip physical record that started before initial_offset_
    if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length <
        initial_offset_) {
      result->clear();
      return kBadRecord;
    }

    *result = Slice(header + kHeaderSize, length);
    return type;
  }
}

}  // namespace log
}  // namespace leveldb

Coverage Report

Created: 2025-06-10 13:21

Line	Count	Source
1		// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2		// Use of this source code is governed by a BSD-style license that can be
3		// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5		#include "db/log_reader.h"
6
7		#include <stdio.h>
8
9		#include "leveldb/env.h"
10		#include "util/coding.h"
11		#include "util/crc32c.h"
12
13		namespace leveldb {
14		namespace log {
15
16	33.2k	Reader::Reporter::~Reporter() = default;
17
18		Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
19		uint64_t initial_offset)
20	33.2k	: file_(file),
21	33.2k	reporter_(reporter),
22	33.2k	checksum_(checksum),
23	33.2k	backing_store_(new char[kBlockSize]),
24	33.2k	buffer_(),
25	33.2k	eof_(false),
26	33.2k	last_record_offset_(0),
27	33.2k	end_of_buffer_offset_(0),
28	33.2k	initial_offset_(initial_offset),
29	33.2k	resyncing_(initial_offset > 0) {}
30
31	33.2k	Reader::~Reader() { delete[] backing_store_; }
32
33	0	bool Reader::SkipToInitialBlock() {
34	0	const size_t offset_in_block = initial_offset_ % kBlockSize;
35	0	uint64_t block_start_location = initial_offset_ - offset_in_block;
36
37		// Don't search a block if we'd be in the trailer
38	0	if (offset_in_block > kBlockSize - 6) { Branch (38:7): [True: 0, False: 0]
39	0	block_start_location += kBlockSize;
40	0	}
41
42	0	end_of_buffer_offset_ = block_start_location;
43
44		// Skip to start of first block that can contain the initial record
45	0	if (block_start_location > 0) { Branch (45:7): [True: 0, False: 0]
46	0	Status skip_status = file_->Skip(block_start_location);
47	0	if (!skip_status.ok()) { Branch (47:9): [True: 0, False: 0]
48	0	ReportDrop(block_start_location, skip_status);
49	0	return false;
50	0	}
51	0	}
52
53	0	return true;
54	0	}
55
56	66.5k	bool Reader::ReadRecord(Slice* record, std::string* scratch) {
57	66.5k	if (last_record_offset_ < initial_offset_) { Branch (57:7): [True: 0, False: 66.5k]
58	0	if (!SkipToInitialBlock()) { Branch (58:9): [True: 0, False: 0]
59	0	return false;
60	0	}
61	0	}
62
63	66.5k	scratch->clear();
64	66.5k	record->clear();
65	66.5k	bool in_fragmented_record = false;
66		// Record offset of the logical record that we're reading
67		// 0 is a dummy value to make compilers happy
68	66.5k	uint64_t prospective_record_offset = 0;
69
70	66.5k	Slice fragment;
71	66.5k	while (true) { Branch (71:10): [Folded - Ignored]
72	66.5k	const unsigned int record_type = ReadPhysicalRecord(&fragment);
73
74		// ReadPhysicalRecord may have only had an empty trailer remaining in its
75		// internal buffer. Calculate the offset of the next physical record now
76		// that it has returned, properly accounting for its header size.
77	66.5k	uint64_t physical_record_offset =
78	66.5k	end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
79
80	66.5k	if (resyncing_) { Branch (80:9): [True: 0, False: 66.5k]
81	0	if (record_type == kMiddleType) { Branch (81:11): [True: 0, False: 0]
82	0	continue;
83	0	} else if (record_type == kLastType) { Branch (83:18): [True: 0, False: 0]
84	0	resyncing_ = false;
85	0	continue;
86	0	} else {
87	0	resyncing_ = false;
88	0	}
89	0	}
90
91	66.5k	switch (record_type) {
92	33.2k	case kFullType: Branch (92:7): [True: 33.2k, False: 33.2k]
93	33.2k	if (in_fragmented_record) { Branch (93:13): [True: 0, False: 33.2k]
94		// Handle bug in earlier versions of log::Writer where
95		// it could emit an empty kFirstType record at the tail end
96		// of a block followed by a kFullType or kFirstType record
97		// at the beginning of the next block.
98	0	if (!scratch->empty()) { Branch (98:15): [True: 0, False: 0]
99	0	ReportCorruption(scratch->size(), "partial record without end(1)");
100	0	}
101	0	}
102	33.2k	prospective_record_offset = physical_record_offset;
103	33.2k	scratch->clear();
104	33.2k	*record = fragment;
105	33.2k	last_record_offset_ = prospective_record_offset;
106	33.2k	return true;
107
108	0	case kFirstType: Branch (108:7): [True: 0, False: 66.5k]
109	0	if (in_fragmented_record) { Branch (109:13): [True: 0, False: 0]
110		// Handle bug in earlier versions of log::Writer where
111		// it could emit an empty kFirstType record at the tail end
112		// of a block followed by a kFullType or kFirstType record
113		// at the beginning of the next block.
114	0	if (!scratch->empty()) { Branch (114:15): [True: 0, False: 0]
115	0	ReportCorruption(scratch->size(), "partial record without end(2)");
116	0	}
117	0	}
118	0	prospective_record_offset = physical_record_offset;
119	0	scratch->assign(fragment.data(), fragment.size());
120	0	in_fragmented_record = true;
121	0	break;
122
123	0	case kMiddleType: Branch (123:7): [True: 0, False: 66.5k]
124	0	if (!in_fragmented_record) { Branch (124:13): [True: 0, False: 0]
125	0	ReportCorruption(fragment.size(),
126	0	"missing start of fragmented record(1)");
127	0	} else {
128	0	scratch->append(fragment.data(), fragment.size());
129	0	}
130	0	break;
131
132	0	case kLastType: Branch (132:7): [True: 0, False: 66.5k]
133	0	if (!in_fragmented_record) { Branch (133:13): [True: 0, False: 0]
134	0	ReportCorruption(fragment.size(),
135	0	"missing start of fragmented record(2)");
136	0	} else {
137	0	scratch->append(fragment.data(), fragment.size());
138	0	record = Slice(scratch);
139	0	last_record_offset_ = prospective_record_offset;
140	0	return true;
141	0	}
142	0	break;
143
144	33.2k	case kEof: Branch (144:7): [True: 33.2k, False: 33.2k]
145	33.2k	if (in_fragmented_record) { Branch (145:13): [True: 0, False: 33.2k]
146		// This can be caused by the writer dying immediately after
147		// writing a physical record but before completing the next; don't
148		// treat it as a corruption, just ignore the entire logical record.
149	0	scratch->clear();
150	0	}
151	33.2k	return false;
152
153	0	case kBadRecord: Branch (153:7): [True: 0, False: 66.5k]
154	0	if (in_fragmented_record) { Branch (154:13): [True: 0, False: 0]
155	0	ReportCorruption(scratch->size(), "error in middle of record");
156	0	in_fragmented_record = false;
157	0	scratch->clear();
158	0	}
159	0	break;
160
161	0	default: { Branch (161:7): [True: 0, False: 66.5k]
162	0	char buf[40];
163	0	snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
164	0	ReportCorruption(
165	0	(fragment.size() + (in_fragmented_record ? scratch->size() : 0)), Branch (165:33): [True: 0, False: 0]
166	0	buf);
167	0	in_fragmented_record = false;
168	0	scratch->clear();
169	0	break;
170	0	}
171	66.5k	}
172	66.5k	}
173	0	return false;
174	66.5k	}
175
176	0	uint64_t Reader::LastRecordOffset() { return last_record_offset_; }
177
178	0	void Reader::ReportCorruption(uint64_t bytes, const char* reason) {
179	0	ReportDrop(bytes, Status::Corruption(reason, file_->GetName()));
180	0	}
181
182	0	void Reader::ReportDrop(uint64_t bytes, const Status& reason) {
183	0	if (reporter_ != nullptr && Branch (183:7): [True: 0, False: 0]
184	0	end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) { Branch (184:7): [True: 0, False: 0]
185	0	reporter_->Corruption(static_cast<size_t>(bytes), reason);
186	0	}
187	0	}
188
189	66.5k	unsigned int Reader::ReadPhysicalRecord(Slice* result) {
190	99.8k	while (true) { Branch (190:10): [Folded - Ignored]
191	99.8k	if (buffer_.size() < kHeaderSize) { Branch (191:9): [True: 66.5k, False: 33.2k]
192	66.5k	if (!eof_) { Branch (192:11): [True: 33.2k, False: 33.2k]
193		// Last read was a full read, so this is a trailer to skip
194	33.2k	buffer_.clear();
195	33.2k	Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
196	33.2k	end_of_buffer_offset_ += buffer_.size();
197	33.2k	if (!status.ok()) { Branch (197:13): [True: 0, False: 33.2k]
198	0	buffer_.clear();
199	0	ReportDrop(kBlockSize, status);
200	0	eof_ = true;
201	0	return kEof;
202	33.2k	} else if (buffer_.size() < kBlockSize) { Branch (202:20): [True: 33.2k, False: 0]
203	33.2k	eof_ = true;
204	33.2k	}
205	33.2k	continue;
206	33.2k	} else {
207		// Note that if buffer_ is non-empty, we have a truncated header at the
208		// end of the file, which can be caused by the writer crashing in the
209		// middle of writing the header. Instead of considering this an error,
210		// just report EOF.
211	33.2k	buffer_.clear();
212	33.2k	return kEof;
213	33.2k	}
214	66.5k	}
215
216		// Parse the header
217	33.2k	const char* header = buffer_.data();
218	33.2k	const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
219	33.2k	const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
220	33.2k	const unsigned int type = header[6];
221	33.2k	const uint32_t length = a \| (b << 8);
222	33.2k	if (kHeaderSize + length > buffer_.size()) { Branch (222:9): [True: 0, False: 33.2k]
223	0	size_t drop_size = buffer_.size();
224	0	buffer_.clear();
225	0	if (!eof_) { Branch (225:11): [True: 0, False: 0]
226	0	ReportCorruption(drop_size, "bad record length");
227	0	return kBadRecord;
228	0	}
229		// If the end of the file has been reached without reading \|length\| bytes
230		// of payload, assume the writer died in the middle of writing the record.
231		// Don't report a corruption.
232	0	return kEof;
233	0	}
234
235	33.2k	if (type == kZeroType && length == 0) { Branch (235:9): [True: 0, False: 33.2k] Branch (235:30): [True: 0, False: 0]
236		// Skip zero length record without reporting any drops since
237		// such records are produced by the mmap based writing code in
238		// env_posix.cc that preallocates file regions.
239	0	buffer_.clear();
240	0	return kBadRecord;
241	0	}
242
243		// Check crc
244	33.2k	if (checksum_) { Branch (244:9): [True: 33.2k, False: 0]
245	33.2k	uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
246	33.2k	uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
247	33.2k	if (actual_crc != expected_crc) { Branch (247:11): [True: 0, False: 33.2k]
248		// Drop the rest of the buffer since "length" itself may have
249		// been corrupted and if we trust it, we could find some
250		// fragment of a real log record that just happens to look
251		// like a valid log record.
252	0	size_t drop_size = buffer_.size();
253	0	buffer_.clear();
254	0	ReportCorruption(drop_size, "checksum mismatch");
255	0	return kBadRecord;
256	0	}
257	33.2k	}
258
259	33.2k	buffer_.remove_prefix(kHeaderSize + length);
260
261		// Skip physical record that started before initial_offset_
262	33.2k	if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length < Branch (262:9): [True: 0, False: 33.2k]
263	33.2k	initial_offset_) {
264	0	result->clear();
265	0	return kBadRecord;
266	0	}
267
268	33.2k	*result = Slice(header + kHeaderSize, length);
269	33.2k	return type;
270	33.2k	}
271	66.5k	}
272
273		} // namespace log
274		} // namespace leveldb