|
| 1 | +// |
| 2 | +// TextLogDecoder.hh |
| 3 | +// |
| 4 | +// Copyright 2025-Present Couchbase, Inc. |
| 5 | +// |
| 6 | +// Use of this software is governed by the Business Source License included |
| 7 | +// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified |
| 8 | +// in that file, in accordance with the Business Source License, use of this |
| 9 | +// software will be governed by the Apache License, Version 2.0, included in |
| 10 | +// the file licenses/APL2.txt. |
| 11 | +// |
| 12 | + |
| 13 | +#pragma once |
| 14 | +#include "LogDecoder.hh" |
| 15 | +#include "ParseDate.hh" |
| 16 | +#include <istream> |
| 17 | +#include <regex> |
| 18 | +#include <stdexcept> |
| 19 | + |
| 20 | +namespace litecore { |
| 21 | + |
| 22 | + /** Parses LiteCore-generated textual log files. */ |
| 23 | + class TextLogDecoder : public LogIterator { |
| 24 | + public: |
| 25 | + /** Returns true if the stream `in` appears to contain textual log data. */ |
| 26 | + static bool looksTextual(std::istream& in) { |
| 27 | + auto pos = in.tellg(); |
| 28 | + char chars[27] = {}; |
| 29 | + bool result = true; |
| 30 | + in.read((char*)chars, sizeof(chars)); |
| 31 | + in.seekg(pos); |
| 32 | + return looksLikeLogLine(std::string_view(chars, std::size(chars))); |
| 33 | + } |
| 34 | + |
| 35 | + /** Initializes decoder with a stream written by LiteCore's textual log encoder. */ |
| 36 | + explicit TextLogDecoder(std::istream& in) : _in(in) { |
| 37 | + _in.exceptions(std::istream::badbit); |
| 38 | + if ( next() && next() ) // Read header line to get the initial timestamp |
| 39 | + _startTime = _curTimestamp; |
| 40 | + } |
| 41 | + |
| 42 | + bool next() override { |
| 43 | + if ( _line.empty() ) { |
| 44 | + // Read next line if there's not one in the buffer: |
| 45 | + if ( !_in || _in.peek() < 0 ) return false; |
| 46 | + std::getline(_in, _line); |
| 47 | + if ( _line.empty() ) return false; |
| 48 | + } |
| 49 | + |
| 50 | + // Example: 2025-12-09T06:47:55.507699Z WS Verbose Obj=/JRepl@1175308903/…/ Received 58-byte message |
| 51 | + |
| 52 | + std::string_view rest(_line); |
| 53 | + auto nextColumn = [&] { |
| 54 | + auto next = rest.find(' '); |
| 55 | + std::string_view column = rest.substr(0, next); |
| 56 | + rest = rest.substr(next + 1); |
| 57 | + return column; |
| 58 | + }; |
| 59 | + |
| 60 | + auto timestamp = nextColumn(); |
| 61 | + auto micros = std::stoul(std::string(timestamp.substr(timestamp.size() - 7, 6))); |
| 62 | + auto millis = fleece::ParseISO8601Date(timestamp); |
| 63 | + if ( millis == kInvalidDate || millis < 0x19000000 || micros > 999999 ) |
| 64 | + throw std::runtime_error("Could not parse timestamp in log line: " + _line); |
| 65 | + _curTimestamp = {millis / 1000, unsigned(micros)}; |
| 66 | + |
| 67 | + _curDomain = nextColumn(); |
| 68 | + |
| 69 | + auto levelStr = nextColumn(); |
| 70 | + if ( auto i = std::ranges::find(kLevelNames, levelStr); i != std::end(kLevelNames) ) |
| 71 | + _curLevel = i - std::begin(kLevelNames); |
| 72 | + else |
| 73 | + _curLevel = 0; |
| 74 | + |
| 75 | + _curObject.clear(); |
| 76 | + _curObjectID = 0; |
| 77 | + if ( rest.starts_with("Obj=/") ) { |
| 78 | + std::string_view obj = nextColumn(); |
| 79 | + if ( auto size = obj.size(); size >= 13 && obj.ends_with('/') ) { |
| 80 | + if ( auto pos = obj.rfind('#'); pos != std::string::npos ) { |
| 81 | + _curObject = obj.substr(5, size - 6); // trim 'Obj=/' and '/ ' |
| 82 | + _curObjectID = std::stoul(std::string(obj.substr(pos + 1, size - 2 - pos))); |
| 83 | + } |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + _curMessage = rest; |
| 88 | + |
| 89 | + // Add any following non-log-format lines to the message: |
| 90 | + _line.clear(); |
| 91 | + while ( _in && _in.peek() >= 0 ) { |
| 92 | + std::getline(_in, _line); |
| 93 | + if ( _line.empty() ) break; |
| 94 | + if ( looksLikeLogLine(_line) ) break; |
| 95 | + _curMessage += '\n'; |
| 96 | + _curMessage += _line; |
| 97 | + _line.clear(); |
| 98 | + } |
| 99 | + |
| 100 | + return true; |
| 101 | + } |
| 102 | + |
| 103 | + Timestamp startTime() const override { return _startTime; } |
| 104 | + |
| 105 | + Timestamp timestamp() const override { return _curTimestamp; } |
| 106 | + |
| 107 | + int8_t level() const override { return _curLevel; } |
| 108 | + |
| 109 | + const std::string& domain() const override { return _curDomain; } |
| 110 | + |
| 111 | + uint64_t objectID() const override { return _curObjectID; } |
| 112 | + |
| 113 | + const std::string* objectDescription() const override { return &_curObject; } |
| 114 | + |
| 115 | + void decodeMessageTo(std::ostream& out) override { out << _curMessage; } |
| 116 | + |
| 117 | + private: |
| 118 | + static bool looksLikeLogLine(std::string_view line) { |
| 119 | + if ( line.size() < 27 ) return false; |
| 120 | + for ( uint8_t c : line.substr(0, 27) ) { |
| 121 | + if ( !isdigit(c) && c != '-' && c != ':' && c != '.' && c != 'Z' && c != 'T' ) return false; |
| 122 | + } |
| 123 | + return true; |
| 124 | + } |
| 125 | + |
| 126 | + static constexpr std::string_view kLevelNames[] = {"Debug", "Verbose", "Info", "WARNING", "ERROR"}; |
| 127 | + |
| 128 | + std::istream& _in; |
| 129 | + Timestamp _startTime{}; |
| 130 | + std::string _line; |
| 131 | + |
| 132 | + Timestamp _curTimestamp; |
| 133 | + int8_t _curLevel; |
| 134 | + std::string _curDomain; |
| 135 | + std::string _curObject; |
| 136 | + uint64_t _curObjectID; |
| 137 | + std::string _curMessage; |
| 138 | + }; |
| 139 | + |
| 140 | +} // namespace litecore |
0 commit comments