diff --git a/.gitignore b/.gitignore index d27277565..fcd5f2b6b 100644 --- a/.gitignore +++ b/.gitignore @@ -47,8 +47,19 @@ build/* cpp/third_party/zlib-1.3.1/treebuild.xml cpp/third_party/zlib-1.3.1/zlib-1.3.1/treebuild.xml -# Claude Code -.claude/settings.local.json -.claude/todos/ -.claude/worktrees/ -.claude/scheduled_tasks.json +# Claude Code (local AI tooling — not uploaded; skill lives in cpp/tools/skills) +.claude/ + +# CodeGraph local index +.codegraph/ + +# Test-run artifacts (temp .tsfile/.dat written to the working dir or repo root) +cpp/cwrapper_*.tsfile +cpp/tsfile_writer_*.tsfile +cpp/*.dat +/*.tsfile +/*.dat + +# AI workflow artifacts (kept local, not uploaded) +docs/superpowers/ +/QA_Log.md diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 98d93fcfe..ea7e165d1 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -190,6 +190,9 @@ endif () option(BUILD_TEST "Build tests" ON) message("cmake using: BUILD_TEST=${BUILD_TEST}") +option(BUILD_TOOLS "Build the tsfile command-line tools" ON) +message("cmake using: BUILD_TOOLS=${BUILD_TOOLS}") + option(ENABLE_ANTLR4 "Enable ANTLR4 runtime" ON) message("cmake using: ENABLE_ANTLR4=${ENABLE_ANTLR4}") @@ -262,6 +265,9 @@ endif () add_subdirectory(third_party) add_subdirectory(src) +if (BUILD_TOOLS) + add_subdirectory(tools) +endif () if (BUILD_TEST) add_subdirectory(test) if (TESTS_ENABLED) @@ -272,4 +278,3 @@ else() endif () add_subdirectory(examples) - diff --git a/cpp/src/file/read_file.cc b/cpp/src/file/read_file.cc index dd1c42dad..8aab78ca6 100644 --- a/cpp/src/file/read_file.cc +++ b/cpp/src/file/read_file.cc @@ -21,6 +21,8 @@ #include #include + +#include #ifdef _WIN32 #include #include @@ -49,10 +51,8 @@ int ReadFile::open(const std::string& file_path) { file_path_ = file_path; fd_ = ::open(file_path_.c_str(), O_RDONLY); if (fd_ < 0) { - std::cout << "open file " << file_path << " error :" << fd_ - << std::endl; - std::cout << "open error" << errno << " " << strerror(errno) - << std::endl; + std::cerr << "open file " << file_path << " error: " << strerror(errno) + << " (errno " << errno << ")" << std::endl; return E_FILE_OPEN_ERR; } diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 02c288167..4d325635f 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -110,6 +110,33 @@ if (${DOWNLOADED}) endif () add_subdirectory("${GTEST_SRC_ROOT}" "${CMAKE_BINARY_DIR}/googletest-build" EXCLUDE_FROM_ALL) + # AppleClang searches /usr/local/include before CMake's generated -isystem + # paths, so a system-installed GTest can shadow the vendored headers. Force + # the vendored include dirs ahead for GTest's own sources here; the same is + # done for the consuming TsFile_Test target below (where header resolution + # actually matters for the test code). + foreach (GTEST_TARGET gtest gtest_main gmock gmock_main) + if (TARGET ${GTEST_TARGET}) + target_include_directories(${GTEST_TARGET} BEFORE PRIVATE + ${GTEST_SRC_ROOT}/googletest/include + ${GTEST_SRC_ROOT}/googletest + ${GTEST_SRC_ROOT}/googlemock/include + ${GTEST_SRC_ROOT}/googlemock) + if (APPLE AND NOT MSVC) + target_compile_options(${GTEST_TARGET} BEFORE PRIVATE + -iquote${GTEST_SRC_ROOT}/googletest/include + -iquote${GTEST_SRC_ROOT}/googletest + -I${GTEST_SRC_ROOT}/googletest/include + -I${GTEST_SRC_ROOT}/googletest + -std=c++14) + endif () + endif () + endforeach () + # Remember the vendored GTest header roots so they can be forced ahead of any + # system installation when compiling TsFile_Test itself. + set(VENDORED_GTEST_INCLUDE_DIRS + ${GTEST_SRC_ROOT}/googletest/include + ${GTEST_SRC_ROOT}/googlemock/include) set(TESTS_ENABLED ON PARENT_SCOPE) else () message(WARNING "Failed to download googletest from all provided URLs, setting TESTS_ENABLED to OFF") @@ -186,6 +213,11 @@ if (ENABLE_ZLIB) list(APPEND TEST_SRCS ${ZLIB_TEST_SRCS}) endif() +if (BUILD_TOOLS) + file(GLOB_RECURSE TOOLS_TEST_SRCS "tools/*_test.cc") + list(APPEND TEST_SRCS ${TOOLS_TEST_SRCS}) +endif () + if (${COV_ENABLED}) message("Enable code cov...") add_compile_options(-fprofile-arcs -ftest-coverage) @@ -197,12 +229,27 @@ if (ENABLE_ANTLR4) endif() add_executable(TsFile_Test ${TEST_SRCS}) +# Force the vendored GTest headers ahead of any system installation so the test +# code reliably compiles against the vendored 1.12.1 headers. +if (VENDORED_GTEST_INCLUDE_DIRS) + target_include_directories(TsFile_Test BEFORE PRIVATE + ${VENDORED_GTEST_INCLUDE_DIRS}) +endif () +if (BUILD_TOOLS) + target_include_directories(TsFile_Test PRIVATE ${CMAKE_SOURCE_DIR}/tools) +endif () +if (APPLE AND NOT MSVC) + target_compile_options(TsFile_Test PRIVATE -std=c++14) +endif () target_link_libraries( TsFile_Test GTest::gtest_main GTest::gmock tsfile ) +if (BUILD_TOOLS) + target_link_libraries(TsFile_Test tsfile_cli_obj) +endif () set_target_properties(TsFile_Test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${LIB_TSFILE_SDK_DIR}) @@ -232,4 +279,4 @@ if(WIN32) gtest_discover_tests(TsFile_Test DISCOVERY_MODE PRE_TEST DISCOVERY_TIMEOUT 120) else() gtest_discover_tests(TsFile_Test) -endif() \ No newline at end of file +endif() diff --git a/cpp/test/tools/cli_args_test.cc b/cpp/test/tools/cli_args_test.cc new file mode 100644 index 000000000..614329463 --- /dev/null +++ b/cpp/test/tools/cli_args_test.cc @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "cli/cli_args.h" + +#include + +#include + +#include "cli/run_cli.h" + +TEST(RunCliTest, VersionFlagPrintsVersionAndReturnsOk) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"--version"}, out, err); + EXPECT_EQ(code, 0); + EXPECT_NE(out.str().find("tsfile"), std::string::npos); + EXPECT_TRUE(err.str().empty()); +} + +TEST(RunCliTest, NoArgsPrintsUsageToErrAndReturnsUsageError) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("Usage"), std::string::npos); +} + +TEST(RunCliTest, UnknownCommandIsUsageError) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"frobnicate", "x.tsfile"}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("Unknown command"), std::string::npos); +} + +TEST(RunCliTest, LeadingOptionBeforeCommandIsClearError) { + std::ostringstream out; + std::ostringstream err; + int code = + tsfile_cli::run_cli({"-f", "json", "meta", "data.tsfile"}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("command must come before options"), + std::string::npos) + << err.str(); +} + +TEST(ParseArgsTest, CommandAndFilePositional) { + auto p = tsfile_cli::parse_args({"ls", "data.tsfile"}); + EXPECT_TRUE(p.error.empty()); + EXPECT_EQ(p.command, "ls"); + EXPECT_EQ(p.file, "data.tsfile"); +} + +TEST(ParseArgsTest, FormatFlagParsed) { + auto p = tsfile_cli::parse_args({"cat", "-f", "json", "data.tsfile"}); + EXPECT_TRUE(p.error.empty()); + EXPECT_EQ(p.format, tsfile_cli::ParsedArgs::Format::kJson); +} + +TEST(ParseArgsTest, MeasurementsSplitOnComma) { + auto p = tsfile_cli::parse_args({"cat", "-m", "s1,s2,s3", "data.tsfile"}); + ASSERT_EQ(p.measurements.size(), 3u); + EXPECT_EQ(p.measurements[1], "s2"); +} + +TEST(ParseArgsTest, LimitOffsetAndTimeRange) { + auto p = + tsfile_cli::parse_args({"head", "-n", "5", "--offset", "2", "--start", + "100", "--end", "200", "data.tsfile"}); + EXPECT_EQ(p.limit, 5); + EXPECT_EQ(p.offset, 2); + EXPECT_TRUE(p.has_start); + EXPECT_EQ(p.start, 100); + EXPECT_TRUE(p.has_end); + EXPECT_EQ(p.end, 200); +} + +TEST(ParseArgsTest, UnknownFlagIsError) { + auto p = tsfile_cli::parse_args({"ls", "--bogus", "data.tsfile"}); + EXPECT_FALSE(p.error.empty()); +} + +TEST(ParseArgsTest, BadFormatValueIsError) { + auto p = tsfile_cli::parse_args({"cat", "-f", "yaml", "data.tsfile"}); + EXPECT_FALSE(p.error.empty()); +} + +TEST(ParseArgsTest, MissingFileIsAllowedAtParseTime) { + auto p = tsfile_cli::parse_args({"ls"}); + EXPECT_TRUE(p.error.empty()); + EXPECT_EQ(p.command, "ls"); + EXPECT_TRUE(p.file.empty()); +} + +TEST(ParseArgsTest, WriteFlagsParsed) { + auto p = tsfile_cli::parse_args({"write", "--table", "t1", "--columns", + "s1:INT64:field", "-o", "out.tsfile", "-v", + "--header-match", "in.csv"}); + EXPECT_TRUE(p.error.empty()); + EXPECT_EQ(p.command, "write"); + EXPECT_EQ(p.table, "t1"); + EXPECT_EQ(p.columns, "s1:INT64:field"); + EXPECT_EQ(p.output, "out.tsfile"); + EXPECT_TRUE(p.verbose); + EXPECT_TRUE(p.header_match); + EXPECT_EQ(p.file, "in.csv"); +} + +TEST(ParseArgsTest, OutputFlagNeedsValue) { + auto p = tsfile_cli::parse_args({"write", "-o"}); + EXPECT_FALSE(p.error.empty()); +} + +TEST(ParseArgsTest, DashIsStdinPositional) { + auto p = + tsfile_cli::parse_args({"write", "--table", "t1", "--columns", + "s1:INT64:field", "-o", "out.tsfile", "-"}); + EXPECT_TRUE(p.error.empty()); + EXPECT_EQ(p.file, "-"); +} + +TEST(ParseArgsTest, SeedFlagParsed) { + auto p = tsfile_cli::parse_args( + {"sample", "-m", "s1", "-n", "3", "--seed", "42", "data.tsfile"}); + EXPECT_TRUE(p.error.empty()); + EXPECT_EQ(p.command, "sample"); + EXPECT_EQ(p.limit, 3); + EXPECT_TRUE(p.has_seed); + EXPECT_EQ(p.seed, 42); +} + +TEST(ParseArgsTest, BadSeedValueIsError) { + auto p = tsfile_cli::parse_args( + {"sample", "--seed", "not_a_number", "data.tsfile"}); + EXPECT_FALSE(p.error.empty()); + EXPECT_NE(p.error.find("Invalid --seed"), std::string::npos); +} + +TEST(RunCliTest, SelectIsNoLongerKnownCommand) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"select", "x.tsfile"}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("Unknown command"), std::string::npos); +} + +TEST(RunCliTest, SeedOnCatIsUsageError) { + std::ostringstream out; + std::ostringstream err; + int code = + tsfile_cli::run_cli({"cat", "--seed", "7", "x.tsfile"}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("--seed is only valid for sample"), + std::string::npos); +} + +TEST(RunCliTest, OffsetOnSampleIsUsageError) { + std::ostringstream out; + std::ostringstream err; + int code = + tsfile_cli::run_cli({"sample", "--offset", "2", "x.tsfile"}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("--offset is not valid for sample"), + std::string::npos); +} diff --git a/cpp/test/tools/cli_test_util.h b/cpp/test/tools/cli_test_util.h new file mode 100644 index 000000000..0d1dccb56 --- /dev/null +++ b/cpp/test/tools/cli_test_util.h @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_TEST_UTIL_H +#define TSFILE_CLI_TEST_UTIL_H + +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +#include +#include + +#include "common/schema.h" +#include "common/tablet.h" +#include "file/write_file.h" +#include "writer/tsfile_table_writer.h" + +namespace tsfile_cli_test { + +// Unique per-process path so tests stay isolated when ctest runs the +// gtest-discovered cases in parallel processes. +inline std::string unique_temp_path(const std::string& stem, + const std::string& ext) { + static unsigned counter = 0; +#ifdef _WIN32 + long pid = static_cast(_getpid()); +#else + long pid = static_cast(getpid()); +#endif + std::ostringstream ss; + ss << stem << "_" << pid << "_" << counter++ << ext; + return ss.str(); +} + +inline std::string write_table_fixture() { + storage::libtsfile_init(); + std::string out_path = unique_temp_path("tsfile_cli_fixture", ".tsfile"); + std::string table_name = "table1"; + + storage::WriteFile file; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + file.create(out_path, flags, 0666); + + auto* schema = new storage::TableSchema( + table_name, + { + common::ColumnSchema("id1", common::STRING, common::UNCOMPRESSED, + common::PLAIN, common::ColumnCategory::TAG), + common::ColumnSchema("id2", common::STRING, common::UNCOMPRESSED, + common::PLAIN, common::ColumnCategory::TAG), + common::ColumnSchema("s1", common::INT64, common::UNCOMPRESSED, + common::PLAIN, common::ColumnCategory::FIELD), + }); + + auto* writer = new storage::TsFileTableWriter(&file, schema); + storage::Tablet tablet( + table_name, {"id1", "id2", "s1"}, + {common::STRING, common::STRING, common::INT64}, + {common::ColumnCategory::TAG, common::ColumnCategory::TAG, + common::ColumnCategory::FIELD}, + 10); + + for (int row = 0; row < 5; ++row) { + tablet.add_timestamp(row, static_cast(row)); + tablet.add_value(row, "id1", "id1_field_1"); + tablet.add_value(row, "id2", "id2_field_2"); + tablet.add_value(row, "s1", static_cast(row * 10)); + } + + writer->write_table(tablet); + writer->flush(); + writer->close(); + + delete writer; + delete schema; + return out_path; +} + +} // namespace tsfile_cli_test + +#endif // TSFILE_CLI_TEST_UTIL_H diff --git a/cpp/test/tools/command_e2e_test.cc b/cpp/test/tools/command_e2e_test.cc new file mode 100644 index 000000000..d06e909e0 --- /dev/null +++ b/cpp/test/tools/command_e2e_test.cc @@ -0,0 +1,559 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include +#include +#include +#include + +#include "cli/run_cli.h" +#include "cli_test_util.h" + +namespace { + +struct Fixture { + std::string path = tsfile_cli_test::write_table_fixture(); + ~Fixture() { std::remove(path.c_str()); } +}; + +size_t count_lines(const std::string& s) { + size_t n = 0; + for (char c : s) { + if (c == '\n') { + ++n; + } + } + return n; +} + +} // namespace + +TEST(CliE2E, LsListsTableNameTsv) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"ls", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_EQ(out.str(), "name\ntable1\n"); + EXPECT_TRUE(err.str().empty()); +} + +TEST(CliE2E, LsNoHeaderJustName) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"ls", "-f", "tsv", "--no-header", f.path}, + out, err); + EXPECT_EQ(code, 0); + EXPECT_EQ(out.str(), "table1\n"); +} + +TEST(CliE2E, OpenMissingFileReturnsFileError) { + std::ostringstream out; + std::ostringstream err; + int code = + tsfile_cli::run_cli({"ls", "definitely_missing.tsfile"}, out, err); + EXPECT_EQ(code, 2); + EXPECT_FALSE(err.str().empty()); +} + +TEST(CliE2E, SchemaShowsFieldColumnAndType) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"schema", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_NE( + out.str().find("target\tmeasurement\tdatatype\tencoding\tcompression"), + std::string::npos); + EXPECT_NE(out.str().find("s1"), std::string::npos); + EXPECT_NE(out.str().find("INT64"), std::string::npos); +} + +TEST(CliE2E, SchemaTableMeasurementFilterOnlyShowsRequestedColumn) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"schema", "-m", "s1", "-f", "tsv", f.path}, + out, err); + EXPECT_EQ(code, 0); + EXPECT_NE(out.str().find("table1\ts1\tINT64"), std::string::npos); + EXPECT_EQ(out.str().find("table1\tid1"), std::string::npos); + EXPECT_EQ(out.str().find("table1\tid2"), std::string::npos); +} + +TEST(CliE2E, StatsReportsCountAndTimeRange) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"stats", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_NE(out.str().find("target\tmeasurement\tcount\tstart_time\tend_" + "time\tmin\tmax\tfirst\tlast\tsum"), + std::string::npos); + EXPECT_NE(out.str().find("s1\t5\t0\t4\t0\t40\t0\t40\t100"), + std::string::npos); +} + +TEST(CliE2E, HeadProjectsAndLimits) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"head", "-m", "s1", "-n", "2", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_EQ(out.str(), "time\ts1\n0\t0\n1\t10\n"); +} + +TEST(CliE2E, CatReturnsAllRows) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = + tsfile_cli::run_cli({"cat", "-m", "s1", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_EQ(count_lines(out.str()), 6u); + EXPECT_NE(out.str().find("time\ts1\n"), std::string::npos); +} + +TEST(CliE2E, CatWithTimeRange) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"cat", "-m", "s1", "--start", "2", "--end", "3", "-f", "tsv", f.path}, + out, err); + EXPECT_EQ(code, 0); + EXPECT_EQ(out.str(), "time\ts1\n2\t20\n3\t30\n"); +} + +TEST(CliE2E, CatJsonIsNdjson) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"cat", "-m", "s1", "--start", "0", "--end", "0", "-f", "json", f.path}, + out, err); + EXPECT_EQ(code, 0); + EXPECT_EQ(out.str(), "{\"time\":0,\"s1\":0}\n"); +} + +TEST(CliE2E, MetaReportsFileSummary) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"meta", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_TRUE(err.str().empty()); + EXPECT_NE(out.str().find("file\tmodel\tdevice_count\ttable_count\tseries_" + "count\tstart_time\tend_time\tfile_size_bytes"), + std::string::npos); + EXPECT_NE(out.str().find("\ttable\t"), std::string::npos); +} + +TEST(CliE2E, CountReportsSeriesCountsAndTotal) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"count", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_TRUE(err.str().empty()); + EXPECT_NE(out.str().find("target\tmeasurement\tcount"), std::string::npos); + EXPECT_NE(out.str().find("\ts1\t5"), std::string::npos); + EXPECT_NE(out.str().find("total\t\t"), std::string::npos); +} + +TEST(CliE2E, SampleIsReproducibleWithSeed) { + Fixture f; + std::ostringstream out1; + std::ostringstream err1; + std::ostringstream out2; + std::ostringstream err2; + + int code1 = tsfile_cli::run_cli( + {"sample", "-m", "s1", "-n", "3", "--seed", "7", "-f", "tsv", f.path}, + out1, err1); + int code2 = tsfile_cli::run_cli( + {"sample", "-m", "s1", "-n", "3", "--seed", "7", "-f", "tsv", f.path}, + out2, err2); + + EXPECT_EQ(code1, 0); + EXPECT_EQ(code2, 0); + EXPECT_TRUE(err1.str().empty()); + EXPECT_TRUE(err2.str().empty()); + EXPECT_EQ(out1.str(), out2.str()); + EXPECT_EQ(count_lines(out1.str()), 4u); + EXPECT_NE(out1.str().find("time\ts1\n"), std::string::npos); +} + +TEST(CliE2E, WriteThenReadRoundTrip) { + std::string csv_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_write_in", ".csv"); + { + std::ofstream o(csv_path.c_str()); + o << "time,id1,s1\n0,dev,0\n1,dev,10\n2,dev,20\n"; + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_write_out", ".tsfile"); + + std::ostringstream wout; + std::ostringstream werr; + int wc = tsfile_cli::run_cli( + {"write", "--table", "t1", "--columns", "id1:STRING:tag,s1:INT64:field", + "-o", out_path, csv_path}, + wout, werr); + EXPECT_EQ(wc, 0) << werr.str(); + + std::ostringstream cout_; + std::ostringstream cerr_; + int cc = + tsfile_cli::run_cli({"count", "-f", "tsv", out_path}, cout_, cerr_); + EXPECT_EQ(cc, 0); + EXPECT_NE(cout_.str().find("\ts1\t3"), std::string::npos) << cout_.str(); + + std::ostringstream rout; + std::ostringstream rerr; + int rc = tsfile_cli::run_cli({"cat", "-m", "s1", "-f", "tsv", out_path}, + rout, rerr); + EXPECT_EQ(rc, 0); + EXPECT_EQ(rout.str(), "time\ts1\n0\t0\n1\t10\n2\t20\n"); + + std::remove(csv_path.c_str()); + std::remove(out_path.c_str()); +} + +TEST(CliE2E, WriteMissingColumnsIsUsageError) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"write", "--table", "t1", "-o", "x.tsfile", "in.csv"}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("--columns"), std::string::npos); +} + +namespace { +bool path_exists(const std::string& p) { + std::ifstream in(p.c_str()); + return in.good(); +} +} // namespace + +TEST(CliE2E, WriteRejectsOutOfOrderTimestampsAndLeavesNoOutput) { + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_ooo", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,s1\n5,50\n1,10\n"; + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_ooo_out", ".tsfile"); + + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"write", "--table", "t", "--columns", + "s1:INT64:field", "-o", out_path, csv}, + out, err); + EXPECT_EQ(code, 3); + EXPECT_NE(err.str().find("strictly increasing"), std::string::npos) + << err.str(); + EXPECT_NE(err.str().find("line 3"), std::string::npos) << err.str(); + EXPECT_FALSE(path_exists(out_path)) << "failed import must leave no output"; + + std::remove(csv.c_str()); + std::remove(out_path.c_str()); +} + +TEST(CliE2E, WriteAllowsSameTimestampAcrossDevices) { + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_md", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,id,s1\n1,A,10\n1,B,20\n2,A,30\n"; + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_md_out", ".tsfile"); + + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"write", "--table", "t", "--columns", "id:STRING:tag,s1:INT64:field", + "-o", out_path, csv}, + out, err); + EXPECT_EQ(code, 0) << err.str(); + + std::ostringstream cout_; + std::ostringstream cerr_; + tsfile_cli::run_cli({"count", "-f", "tsv", out_path}, cout_, cerr_); + EXPECT_NE(cout_.str().find("total\t\t3"), std::string::npos) << cout_.str(); + + std::remove(csv.c_str()); + std::remove(out_path.c_str()); +} + +TEST(CliE2E, WriteRejectsOutputEqualsInput) { + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_alias", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,s1\n0,1\n"; + } + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"write", "--table", "t", "--columns", + "s1:INT64:field", "-o", csv, csv}, + out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("same as the input"), std::string::npos) + << err.str(); + // The input file must be untouched. + std::ifstream in(csv.c_str()); + std::stringstream buf; + buf << in.rdbuf(); + EXPECT_EQ(buf.str(), "time,s1\n0,1\n"); + + std::remove(csv.c_str()); +} + +TEST(CliE2E, WriteFailureOnBadValueLeavesNoOutput) { + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_badval", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,s1\n0,notanumber\n"; + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_badval_out", ".tsfile"); + + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"write", "--table", "t", "--columns", + "s1:INT64:field", "-o", out_path, csv}, + out, err); + EXPECT_EQ(code, 3); + EXPECT_FALSE(path_exists(out_path)); + + std::remove(csv.c_str()); + std::remove(out_path.c_str()); +} + +TEST(CliE2E, WriteRejectsDuplicateColumnNames) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"write", "--table", "t", "--columns", "s1:INT64:field,s1:INT64:field", + "-o", "x.tsfile", "-"}, + out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("duplicate column"), std::string::npos) + << err.str(); +} + +TEST(CliE2E, WriteRejectsHeaderMatchWithNoHeader) { + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli( + {"write", "--table", "t", "--columns", "s1:INT64:field", "-o", + "x.tsfile", "--no-header", "--header-match", "-"}, + out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("--header-match"), std::string::npos) << err.str(); +} + +TEST(CliE2E, ReadRejectsWriteOnlyFlag) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"ls", "-o", "x.tsfile", f.path}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("only valid for write"), std::string::npos) + << err.str(); +} + +TEST(CliE2E, MetaRejectsDeviceScopeFlag) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"meta", "-d", "dev", f.path}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("not valid for meta"), std::string::npos) + << err.str(); +} + +TEST(CliE2E, SchemaTableShowsEncodingAndCompression) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"schema", "-f", "tsv", f.path}, out, err); + EXPECT_EQ(code, 0); + // Table-model schema must report real (non-empty) encoding and compression + // rather than blanks. The INT64 field encodes as TS_2DIFF; the compression + // is the engine default (build-dependent) but must not be empty. + EXPECT_NE(out.str().find("\ts1\tINT64\tTS_2DIFF\t"), std::string::npos) + << out.str(); + EXPECT_EQ(out.str().find("\ts1\tINT64\tTS_2DIFF\t\n"), std::string::npos) + << out.str(); +} + +namespace { +// Run a one-row `write` whose single value cell is `value`, declaring the +// column as `type`. Returns the exit code; captures stderr into `err`. +int write_one_value(const std::string& type, const std::string& value, + std::string& err_out) { + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_ovf", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,s1\n0," << value << "\n"; + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_ovf_out", ".tsfile"); + std::ostringstream out; + std::ostringstream err; + int code = + tsfile_cli::run_cli({"write", "--table", "t", "--columns", + "s1:" + type + ":field", "-o", out_path, csv}, + out, err); + err_out = err.str(); + std::remove(csv.c_str()); + std::remove(out_path.c_str()); + return code; +} +} // namespace + +TEST(CliE2E, WriteRejectsInt32Overflow) { + std::string err; + EXPECT_EQ(write_one_value("INT32", "3000000000", err), 3); + EXPECT_NE(err.find("INT32 out of range"), std::string::npos) << err; +} + +TEST(CliE2E, WriteAcceptsInt32Boundary) { + std::string err; + EXPECT_EQ(write_one_value("INT32", "2147483647", err), 0) << err; +} + +TEST(CliE2E, WriteRejectsInt64Overflow) { + std::string err; + EXPECT_EQ(write_one_value("INT64", "99999999999999999999999999", err), 3); + EXPECT_NE(err.find("INT64 out of range"), std::string::npos) << err; +} + +TEST(CliE2E, WriteRejectsDoubleOverflow) { + std::string err; + EXPECT_EQ(write_one_value("DOUBLE", "1e400", err), 3); + EXPECT_NE(err.find("DOUBLE out of range"), std::string::npos) << err; +} + +TEST(CliE2E, WriteRejectsNonNumericInt64) { + std::string err; + EXPECT_EQ(write_one_value("INT64", "12abc", err), 3); + EXPECT_NE(err.find("bad INT64"), std::string::npos) << err; +} + +TEST(CliE2E, WriteRejectsOutOfOrderAcrossBatches) { + // More than one 1024-row batch of ascending rows, then a violating + // timestamp. The first batch is already flushed by the time the bad row is + // read, so this proves both that per-device tracking survives a batch flush + // and that the already-written output is removed on failure. + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_xbatch", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,s1\n"; + for (int i = 1; i <= 1100; ++i) { + o << i << "," << i << "\n"; + } + o << "500,999\n"; // <= the last timestamp for the tag-less device + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_xbatch_out", ".tsfile"); + + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"write", "--table", "t", "--columns", + "s1:INT64:field", "-o", out_path, csv}, + out, err); + EXPECT_EQ(code, 3); + EXPECT_NE(err.str().find("strictly increasing"), std::string::npos) + << err.str(); + EXPECT_FALSE(path_exists(out_path)); + + std::remove(csv.c_str()); + std::remove(out_path.c_str()); +} + +TEST(CliE2E, WriteStreamsLargeInputRoundTrips) { + std::string csv = + tsfile_cli_test::unique_temp_path("tsfile_cli_large", ".csv"); + { + std::ofstream o(csv.c_str()); + o << "time,s1\n"; + for (int i = 1; i <= 3000; ++i) { + o << i << "," << (i * 2) << "\n"; + } + } + std::string out_path = + tsfile_cli_test::unique_temp_path("tsfile_cli_large_out", ".tsfile"); + + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"write", "--table", "big", "--columns", + "s1:INT64:field", "-o", out_path, csv}, + out, err); + EXPECT_EQ(code, 0) << err.str(); + + std::ostringstream cout_; + std::ostringstream cerr_; + tsfile_cli::run_cli({"count", "-f", "tsv", out_path}, cout_, cerr_); + EXPECT_NE(cout_.str().find("\ts1\t3000"), std::string::npos) << cout_.str(); + + std::remove(csv.c_str()); + std::remove(out_path.c_str()); +} + +TEST(CliE2E, HelpWithPositionalFilePrintsUsage) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"cat", "--help", f.path}, out, err); + EXPECT_EQ(code, 0); + EXPECT_NE(out.str().find("Usage:"), std::string::npos) << out.str(); +} + +TEST(CliE2E, StatsRejectsRowOnlyFlag) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"stats", "--start", "1", f.path}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("only valid for head/cat/sample"), + std::string::npos) + << err.str(); +} + +TEST(CliE2E, LsRejectsMeasurementsFlag) { + Fixture f; + std::ostringstream out; + std::ostringstream err; + int code = tsfile_cli::run_cli({"ls", "-m", "s1", f.path}, out, err); + EXPECT_EQ(code, 1); + EXPECT_NE(err.str().find("not valid for ls"), std::string::npos) + << err.str(); +} diff --git a/cpp/test/tools/input_format_test.cc b/cpp/test/tools/input_format_test.cc new file mode 100644 index 000000000..08c4b5f91 --- /dev/null +++ b/cpp/test/tools/input_format_test.cc @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "format/input_format.h" + +#include + +#include "common/db_common.h" +#include "utils/db_utils.h" + +TEST(InputFormatTest, ParseColumnsSpecValid) { + std::vector cols; + std::string err; + EXPECT_TRUE(tsfile_cli::parse_columns_spec("id1:STRING:tag,s1:INT64:field", + cols, err)); + ASSERT_EQ(cols.size(), 2u); + EXPECT_EQ(cols[0].name, "id1"); + EXPECT_EQ(cols[0].type, common::STRING); + EXPECT_EQ(cols[0].category, common::ColumnCategory::TAG); + EXPECT_EQ(cols[1].type, common::INT64); + EXPECT_EQ(cols[1].category, common::ColumnCategory::FIELD); +} + +TEST(InputFormatTest, ParseColumnsSpecCaseInsensitiveType) { + std::vector cols; + std::string err; + EXPECT_TRUE(tsfile_cli::parse_columns_spec("s1:int64:field", cols, err)); + EXPECT_EQ(cols[0].type, common::INT64); +} + +TEST(InputFormatTest, ParseColumnsSpecErrors) { + std::vector cols; + std::string err; + EXPECT_FALSE(tsfile_cli::parse_columns_spec("s1:NOPE:field", cols, err)); + EXPECT_FALSE(tsfile_cli::parse_columns_spec("s1:INT64:bogus", cols, err)); + EXPECT_FALSE(tsfile_cli::parse_columns_spec("s1:INT64", cols, err)); + EXPECT_FALSE(tsfile_cli::parse_columns_spec("", cols, err)); + EXPECT_FALSE(tsfile_cli::parse_columns_spec(":INT64:field", cols, err)); +} + +TEST(InputFormatTest, ParseColumnsSpecRejectsDuplicateNames) { + std::vector cols; + std::string err; + EXPECT_FALSE(tsfile_cli::parse_columns_spec("s1:INT64:field,s1:INT32:field", + cols, err)); + EXPECT_NE(err.find("duplicate column"), std::string::npos) << err; +} + +TEST(InputFormatTest, SplitLineTsv) { + std::vector f = + tsfile_cli::split_line("0\t10\t20", '\t', false); + ASSERT_EQ(f.size(), 3u); + EXPECT_EQ(f[0], "0"); + EXPECT_EQ(f[2], "20"); +} + +TEST(InputFormatTest, SplitLineCsvQuotes) { + std::vector f = + tsfile_cli::split_line("1,\"a,b\",\"she \"\"hi\"\"\"", ',', true); + ASSERT_EQ(f.size(), 3u); + EXPECT_EQ(f[1], "a,b"); + EXPECT_EQ(f[2], "she \"hi\""); +} + +TEST(InputFormatTest, SplitLineEmptyFields) { + std::vector f = tsfile_cli::split_line("0,,5", ',', true); + ASSERT_EQ(f.size(), 3u); + EXPECT_EQ(f[1], ""); +} + +TEST(InputFormatTest, ParseBoolCell) { + bool b = false; + EXPECT_TRUE(tsfile_cli::parse_bool_cell("true", b)); + EXPECT_TRUE(b); + EXPECT_TRUE(tsfile_cli::parse_bool_cell("0", b)); + EXPECT_FALSE(b); + EXPECT_FALSE(tsfile_cli::parse_bool_cell("maybe", b)); +} diff --git a/cpp/test/tools/output_format_test.cc b/cpp/test/tools/output_format_test.cc new file mode 100644 index 000000000..abaa47dc9 --- /dev/null +++ b/cpp/test/tools/output_format_test.cc @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "format/output_format.h" + +#include + +#include +#include + +#include "common/db_common.h" +#include "utils/errno_define.h" + +using tsfile_cli::OutputFormat; +using tsfile_cli::ParsedArgs; +using tsfile_cli::RowWriter; + +TEST(ErrorCodeMessageTest, KnownCodesMapToReadablePhrases) { + EXPECT_STREQ(tsfile_cli::error_code_message(common::E_TABLE_NOT_EXIST), + "table does not exist"); + EXPECT_STREQ(tsfile_cli::error_code_message(common::E_DEVICE_NOT_EXIST), + "device does not exist"); + EXPECT_STREQ( + tsfile_cli::error_code_message(common::E_MEASUREMENT_NOT_EXIST), + "measurement does not exist"); + EXPECT_STREQ(tsfile_cli::error_code_message(common::E_TSFILE_CORRUPTED), + "file is corrupted"); + EXPECT_STREQ(tsfile_cli::error_code_message(common::E_OUT_OF_ORDER), + "data is out of order"); + EXPECT_STREQ(tsfile_cli::error_code_message(common::E_DECODE_ERR), + "failed to decode data"); +} + +TEST(ErrorCodeMessageTest, UnknownCodeFallsBackToInternalError) { + EXPECT_STREQ(tsfile_cli::error_code_message(987654), "internal error"); + // The phrase is always a non-empty, printable string (never a bare code). + EXPECT_GT(std::string(tsfile_cli::error_code_message(-1)).size(), 0u); +} + +TEST(ResolveFormatTest, AutoUsesTableOnTtyTsvOtherwise) { + EXPECT_EQ(tsfile_cli::resolve_format(ParsedArgs::Format::kAuto, true), + OutputFormat::kTable); + EXPECT_EQ(tsfile_cli::resolve_format(ParsedArgs::Format::kAuto, false), + OutputFormat::kTsv); + EXPECT_EQ(tsfile_cli::resolve_format(ParsedArgs::Format::kJson, true), + OutputFormat::kJson); +} + +TEST(CsvEscapeTest, QuotesWhenSpecialCharsPresent) { + EXPECT_EQ(tsfile_cli::csv_escape("plain"), "plain"); + EXPECT_EQ(tsfile_cli::csv_escape("a,b"), "\"a,b\""); + EXPECT_EQ(tsfile_cli::csv_escape("she said \"hi\""), + "\"she said \"\"hi\"\"\""); + EXPECT_EQ(tsfile_cli::csv_escape("line\nbreak"), "\"line\nbreak\""); +} + +TEST(JsonEscapeTest, EscapesQuotesBackslashAndControls) { + EXPECT_EQ(tsfile_cli::json_escape("a\"b\\c"), "a\\\"b\\\\c"); + EXPECT_EQ(tsfile_cli::json_escape("tab\there"), "tab\\there"); +} + +TEST(TypeNameTest, KnownTypesMapToNames) { + EXPECT_STREQ(tsfile_cli::tsdatatype_name(common::INT64), "INT64"); + EXPECT_STREQ(tsfile_cli::tsdatatype_name(common::STRING), "STRING"); + EXPECT_STREQ(tsfile_cli::tsdatatype_name(common::BOOLEAN), "BOOLEAN"); +} + +TEST(EncodingNameTest, KnownEncodings) { + EXPECT_STREQ(tsfile_cli::tsencoding_name(common::PLAIN), "PLAIN"); + EXPECT_STREQ(tsfile_cli::tsencoding_name(common::TS_2DIFF), "TS_2DIFF"); + EXPECT_STREQ(tsfile_cli::tsencoding_name(common::SPRINTZ), "SPRINTZ"); +} + +TEST(CompressionNameTest, KnownCompressors) { + EXPECT_STREQ(tsfile_cli::compression_name(common::UNCOMPRESSED), + "UNCOMPRESSED"); + EXPECT_STREQ(tsfile_cli::compression_name(common::SNAPPY), "SNAPPY"); + EXPECT_STREQ(tsfile_cli::compression_name(common::LZ4), "LZ4"); +} + +TEST(RowWriterTest, TsvWritesHeaderThenRows) { + std::ostringstream out; + RowWriter w(out, OutputFormat::kTsv, {"time", "s1"}, + {common::INT64, common::INT64}, false); + w.write({"1", "10"}, {false, false}); + w.write({"2", ""}, {false, true}); + w.finish(); + EXPECT_EQ(out.str(), "time\ts1\n1\t10\n2\t\n"); +} + +TEST(RowWriterTest, NoHeaderSuppressesHeader) { + std::ostringstream out; + RowWriter w(out, OutputFormat::kTsv, {"name"}, {common::STRING}, true); + w.write({"table1"}, {false}); + w.finish(); + EXPECT_EQ(out.str(), "table1\n"); +} + +TEST(RowWriterTest, CsvEscapesCells) { + std::ostringstream out; + RowWriter w(out, OutputFormat::kCsv, {"name"}, {common::STRING}, false); + w.write({"a,b"}, {false}); + w.finish(); + EXPECT_EQ(out.str(), "name\n\"a,b\"\n"); +} + +TEST(RowWriterTest, JsonNumbersUnquotedStringsQuotedNullEmitted) { + std::ostringstream out; + RowWriter w(out, OutputFormat::kJson, {"time", "name"}, + {common::INT64, common::STRING}, false); + w.write({"5", "dev1"}, {false, false}); + w.write({"6", ""}, {false, true}); + w.finish(); + EXPECT_EQ(out.str(), + "{\"time\":5,\"name\":\"dev1\"}\n" + "{\"time\":6,\"name\":null}\n"); +} + +TEST(RowWriterTest, TableAlignsColumns) { + std::ostringstream out; + RowWriter w(out, OutputFormat::kTable, {"name", "type"}, + {common::STRING, common::STRING}, false); + w.write({"s1", "INT64"}, {false, false}); + w.write({"longname", "BOOLEAN"}, {false, false}); + w.finish(); + EXPECT_EQ(out.str(), + "name type\n" + "s1 INT64\n" + "longname BOOLEAN\n"); +} diff --git a/cpp/test/tools/statistics_test.cc b/cpp/test/tools/statistics_test.cc new file mode 100644 index 000000000..a151fdc3c --- /dev/null +++ b/cpp/test/tools/statistics_test.cc @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "commands/statistics.h" + +#include + +#include "common/statistic.h" + +TEST(StatisticsTest, Int64StatisticCellsContainValueSummaries) { + storage::Int64Statistic st; + st.update(1, static_cast(10)); + st.update(3, static_cast(30)); + tsfile_cli::StatisticCells cells = tsfile_cli::statistic_value_cells(&st); + EXPECT_EQ(cells.values[0], "10"); + EXPECT_EQ(cells.values[1], "30"); + EXPECT_EQ(cells.values[2], "10"); + EXPECT_EQ(cells.values[3], "30"); + EXPECT_EQ(cells.values[4], "40"); + EXPECT_EQ(cells.is_null, + std::vector({false, false, false, false, false})); +} + +TEST(StatisticsTest, BooleanStatisticLeavesMinMaxNull) { + storage::BooleanStatistic st; + st.update(1, true); + st.update(2, false); + tsfile_cli::StatisticCells cells = tsfile_cli::statistic_value_cells(&st); + EXPECT_TRUE(cells.is_null[0]); + EXPECT_TRUE(cells.is_null[1]); + EXPECT_EQ(cells.values[2], "true"); + EXPECT_EQ(cells.values[3], "false"); + EXPECT_EQ(cells.values[4], "1"); +} diff --git a/cpp/tools/CMakeLists.txt b/cpp/tools/CMakeLists.txt new file mode 100644 index 000000000..d0e5d9633 --- /dev/null +++ b/cpp/tools/CMakeLists.txt @@ -0,0 +1,45 @@ +#[[ +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +]] + +file(GLOB_RECURSE TSFILE_CLI_SRCS + "cli/*.cc" + "format/*.cc" + "commands/*.cc") + +add_library(tsfile_cli_obj OBJECT ${TSFILE_CLI_SRCS}) +target_include_directories(tsfile_cli_obj PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/src) + +if (ENABLE_ANTLR4) + target_include_directories(tsfile_cli_obj PUBLIC + ${PROJECT_SOURCE_DIR}/third_party/antlr4-cpp-runtime-4/runtime/src) +endif () + +target_compile_definitions(tsfile_cli_obj PRIVATE + TSFILE_CLI_VERSION="${TsFile_CPP_VERSION}") + +add_executable(tsfile_cli tools_main.cc $) +target_include_directories(tsfile_cli PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(tsfile_cli tsfile) +set_target_properties(tsfile_cli PROPERTIES + OUTPUT_NAME tsfile-cli + RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) + +install(TARGETS tsfile_cli RUNTIME DESTINATION bin) diff --git a/cpp/tools/README.md b/cpp/tools/README.md new file mode 100644 index 000000000..c5721f46c --- /dev/null +++ b/cpp/tools/README.md @@ -0,0 +1,215 @@ + + +# tsfile-cli — TsFile Command-Line Tool + +`tsfile-cli` is a single, pipe-friendly C++ command-line tool for inspecting **and** +importing Apache TsFile (`.tsfile`) files from the shell — the TsFile analogue of +`parquet-cli` / `pqrs`. Read commands print data to **stdout** and diagnostics to +**stderr**, so they compose with `awk`, `jq`, `sort`, and friends; the `write` command +imports CSV/TSV into a new `.tsfile`. It is built on the public `storage::TsFileReader` +and `storage::TsFileTableWriter` APIs and does not modify the storage engine. + +## Building from source + +The CLI is part of the C++ module and is built by default (CMake option `BUILD_TOOLS=ON`). +The CMake target is `tsfile_cli`; the produced executable is named `tsfile-cli`. + +**Prerequisites:** a C++11 compiler (GCC / Clang / MSVC) and CMake ≥ 3.11. The third-party +dependencies (ANTLR4, Snappy, LZ4, LZOKAY, Zlib, GoogleTest) are bundled under +`cpp/third_party/` and built automatically — no separate install step needed. + +Choose any one of the following. + +**1. Build script (recommended).** From `cpp/`: + +```bash +bash build.sh -t=Debug # -> cpp/build/Debug/bin/tsfile-cli +bash build.sh # Release (default) -> cpp/build/Release/bin/tsfile-cli +``` + +**2. Maven (builds the whole C++ module).** From the repository root: + +```bash +./mvnw clean package -P with-cpp # -> cpp/target/build/bin/tsfile-cli +``` + +**3. Plain CMake.** From `cpp/`: + +```bash +mkdir -p build/Debug && cd build/Debug +cmake ../.. -DCMAKE_BUILD_TYPE=Debug +make -j tsfile_cli # -> build/Debug/bin/tsfile-cli +``` + +> **CMake 4.x note.** The bundled ANTLR4 runtime sets old CMake policies that CMake 4 +> rejects (`Policy CMP00xx may not be set to OLD behavior`). The reader and CLI do not use +> ANTLR4, so disable it — `--disable-antlr4` for the build script, or `-DENABLE_ANTLR4=OFF` +> for plain CMake: +> +> ```bash +> bash build.sh -t=Debug --disable-antlr4 +> ``` + +Verify the binary: + +```bash +./build/Debug/bin/tsfile-cli --version # -> tsfile-cli (Apache TsFile C++) +./build/Debug/bin/tsfile-cli --help +``` + +The executable links the `tsfile` shared library built alongside it. To run it from +anywhere, either run it in place by its full path, or use CMake's install step +(`cmake --install .` / `make install`), which installs the binary to `/bin` and +`libtsfile` to `/lib`. + +## Usage + +``` +tsfile-cli [options] +tsfile-cli --help | --version | help +``` + +Exit codes: `0` success, `1` usage/argument error, `2` file open/corrupt, +`3` query/runtime error. + +### Reading + +| Command | Description | +|---|---| +| `ls` | List devices (tree model) or tables (table model), one name per line | +| `schema` | Per-series `target, measurement, datatype, encoding, compression` | +| `meta` | File summary: model, device/table/series counts, time range, file size | +| `stats` | Per-series `count, start_time, end_time, min, max, first, last, sum` | +| `count` | Per-series row counts plus a `total` row (from statistics, no page scan) | +| `head` | First N rows (default 10; use `-n`) | +| `cat` | All matching rows, streamed | +| `sample` | Reproducible reservoir sample (default 10; `-n`, `--seed`) | + +The metadata commands (`ls` / `schema` / `meta` / `stats` / `count`) answer most questions +without decoding data pages. + +Shared options: + +| Option | Meaning | +|---|---| +| `-f, --format csv\|tsv\|json\|table` | Output format; defaults to `table` on a TTY, `tsv` when piped | +| `-d, --device ` / `-t, --table ` | Scope to one device / table (mutually exclusive) | +| `-m, --measurements a,b,c` | Column projection (`schema`, `head`, `cat`, `sample`) | +| `-n, --limit N` / `--offset N` | Max rows / rows to skip (`head`, `cat`; `--offset` not valid for `sample`) | +| `--start ` / `--end ` | Inclusive epoch-millisecond time range (`head`, `cat`, `sample`) | +| `--seed N` | Reproducible sampling seed (`sample` only) | +| `--no-header` | Omit the header row | +| `--model tree\|table` | Force the model (otherwise auto-detected) | + +`json` output is NDJSON (one object per line; numbers/booleans bare, other values quoted, +nulls as `null`). CSV output follows RFC 4180. Timestamps are raw epoch milliseconds. + +```bash +BIN=cpp/build/Debug/bin/tsfile-cli +$BIN ls -f tsv data.tsfile # list tables / devices +$BIN meta data.tsfile # quick file overview +$BIN count -t table1 -f tsv data.tsfile # row counts, no page scan +$BIN cat -m temp,humidity --start 1700000000000 -f csv data.tsfile | head +$BIN sample -m temp -n 20 --seed 42 -f json data.tsfile | jq . +``` + +### Writing (import) + +`tsfile-cli write` imports CSV/TSV rows into a **new table-model** `.tsfile` (the output is +overwritten). The first input column is the timestamp (epoch milliseconds); the remaining +columns are declared explicitly with `--columns` — there is no type inference. + +Timestamps must be **strictly increasing per device**, where a device is identified by its +`tag` column values (rows that share the same tags form one device's timeline). Rows for +different tag combinations may freely interleave and reuse timestamps. Out-of-order input is +rejected with the offending line number, and a failed import leaves no output file behind. +`--output` must differ from the input file. + +``` +tsfile-cli write --table --columns -o \ + [-f csv|tsv] [--no-header] [--header-match] [-v] [ | -] +``` + +`--columns` is a comma-separated list of `name:TYPE:category`, where `category` is `tag` or +`field` and `TYPE` (case-insensitive) is one of `BOOLEAN, INT32, INT64, FLOAT, DOUBLE, +STRING, TEXT` — for example `--columns "id1:STRING:tag,s1:INT64:field"`. + +| Option | Meaning | +|---|---| +| `--table ` | Output table name (lower-cased) | +| `--columns ` | Ordered data columns (excludes the leading timestamp column) | +| `-o, --output ` | Output `.tsfile` (required; overwritten) | +| `` / `-` | Input file, or `-` / omitted for stdin | +| `-f csv\|tsv` | Input delimiter (default csv; `json` / `table` are rejected) | +| `--no-header` | Input has no header row (default: first line is a header and is skipped) | +| `--header-match` | Validate header names against `--columns` | +| `-v, --verbose` | Print `wrote N rows to ` to stderr (otherwise silent on success) | + +An empty cell is written as null. The command is silent on success (Unix-style); pass `-v` +for a one-line summary. + +```bash +# round-trip through a pipe +printf 'time,id1,s1\n0,dev,0\n1,dev,10\n' \ + | tsfile-cli write --table t1 --columns "id1:STRING:tag,s1:INT64:field" -o out.tsfile - +tsfile-cli count -f tsv out.tsfile # -> t1.dev s1 2 +``` + +For tree-model writes, JSON input, or programmatic use, use the C++ SDK directly — see +`cpp/examples/cpp_examples/demo_write.cpp` (`TsFileTableWriter` / `TsFileWriter` + `Tablet`). + +## Using the skill with an AI assistant + +`cpp/tools/skills/tsfile-cli/SKILL.md` is a machine-readable reference that teaches AI +coding assistants (e.g. Claude Code) how to drive `tsfile-cli` correctly. Such assistants +auto-discover skills from a `.claude/skills/` directory at session start, so "installing" +the skill just means placing it there — either project-level or user-level: + +```bash +# project-level (this repository only) +mkdir -p .claude/skills/tsfile-cli +cp cpp/tools/skills/tsfile-cli/SKILL.md .claude/skills/tsfile-cli/SKILL.md + +# or user-level (available in all your projects) +mkdir -p ~/.claude/skills/tsfile-cli +cp cpp/tools/skills/tsfile-cli/SKILL.md ~/.claude/skills/tsfile-cli/SKILL.md +``` + +> The installed `SKILL.md` must begin with its YAML front-matter (`--- … ---`) for the +> assistant to detect it. The in-repo copy carries an Apache license header comment above +> the front-matter; if discovery fails, delete that leading `` block from the +> installed copy so `---` is the first line. + +Start a new assistant session afterward. The skill then activates automatically when you +ask to inspect or import a `.tsfile`; you can also invoke it explicitly (e.g. "use the +tsfile-cli skill"). + +## Source layout + +```text +cpp/tools/ +├── tools_main.cc # main(): forwards argv to run_cli +├── cli/ # argument parsing, top-level dispatch, exit codes +├── format/ # csv/tsv/json/table output + CSV/TSV input parsing +├── commands/ # one file per command + shared row-query / statistics helpers +└── skills/tsfile-cli/ # model-facing skill reference (for AI assistants) +``` diff --git a/cpp/tools/cli/cli_args.cc b/cpp/tools/cli/cli_args.cc new file mode 100644 index 000000000..61c193d88 --- /dev/null +++ b/cpp/tools/cli/cli_args.cc @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "cli/cli_args.h" + +#include +#include + +namespace tsfile_cli { +namespace { + +std::vector split_csv(const std::string& s) { + std::vector out; + std::string item; + std::istringstream iss(s); + while (std::getline(iss, item, ',')) { + if (!item.empty()) { + out.push_back(item); + } + } + return out; +} + +bool parse_ll(const std::string& s, long long& out) { + if (s.empty()) { + return false; + } + char* endp = nullptr; + long long v = std::strtoll(s.c_str(), &endp, 10); + if (endp == nullptr || *endp != '\0') { + return false; + } + out = v; + return true; +} + +bool parse_format(const std::string& s, ParsedArgs::Format& out) { + if (s == "csv") { + out = ParsedArgs::Format::kCsv; + } else if (s == "tsv") { + out = ParsedArgs::Format::kTsv; + } else if (s == "json") { + out = ParsedArgs::Format::kJson; + } else if (s == "table") { + out = ParsedArgs::Format::kTable; + } else { + return false; + } + return true; +} + +} // namespace + +ParsedArgs parse_args(const std::vector& args) { + ParsedArgs p; + if (args.empty()) { + return p; + } + p.command = args[0]; + if (p.command == "--version") { + p.version = true; + } + if (p.command == "--help" || p.command == "-h") { + p.help = true; + } + // The subcommand must come first. A leading option means it was omitted; + // say so explicitly instead of failing later with a confusing message about + // the first real positional argument. + if (p.command.size() > 1 && p.command[0] == '-' && !p.version && !p.help) { + p.error = "the command must come before options (got option '" + + p.command + "'); run with --help for usage"; + return p; + } + + size_t i = 1; + auto need_value = [&](const std::string& flag, std::string& dst) -> bool { + if (i + 1 >= args.size()) { + p.error = "Missing value for " + flag; + return false; + } + dst = args[++i]; + return true; + }; + + for (; i < args.size(); ++i) { + const std::string& a = args[i]; + std::string val; + if (a == "-f" || a == "--format") { + if (!need_value(a, val)) { + return p; + } + if (!parse_format(val, p.format)) { + p.error = + "Invalid format: " + val + " (use csv|tsv|json|table)"; + return p; + } + } else if (a == "-d" || a == "--device") { + if (!need_value(a, p.device)) { + return p; + } + } else if (a == "-t" || a == "--table") { + if (!need_value(a, p.table)) { + return p; + } + } else if (a == "-m" || a == "--measurements") { + if (!need_value(a, val)) { + return p; + } + p.measurements = split_csv(val); + } else if (a == "-n" || a == "--limit") { + if (!need_value(a, val)) { + return p; + } + if (!parse_ll(val, p.limit)) { + p.error = "Invalid --limit: " + val; + return p; + } + } else if (a == "--offset") { + if (!need_value(a, val)) { + return p; + } + if (!parse_ll(val, p.offset)) { + p.error = "Invalid --offset: " + val; + return p; + } + } else if (a == "--start") { + if (!need_value(a, val)) { + return p; + } + if (!parse_ll(val, p.start)) { + p.error = "Invalid --start: " + val; + return p; + } + p.has_start = true; + } else if (a == "--end") { + if (!need_value(a, val)) { + return p; + } + if (!parse_ll(val, p.end)) { + p.error = "Invalid --end: " + val; + return p; + } + p.has_end = true; + } else if (a == "--seed") { + if (!need_value(a, val)) { + return p; + } + if (!parse_ll(val, p.seed)) { + p.error = "Invalid --seed: " + val; + return p; + } + p.has_seed = true; + } else if (a == "-o" || a == "--output") { + if (!need_value(a, p.output)) { + return p; + } + } else if (a == "--columns") { + if (!need_value(a, p.columns)) { + return p; + } + } else if (a == "-v" || a == "--verbose") { + p.verbose = true; + } else if (a == "--header-match") { + p.header_match = true; + } else if (a == "--model") { + if (!need_value(a, val)) { + return p; + } + if (val != "tree" && val != "table") { + p.error = "Invalid --model: " + val + " (use tree|table)"; + return p; + } + p.model = val; + } else if (a == "--no-header") { + p.no_header = true; + } else if (a == "-h" || a == "--help") { + p.help = true; + } else if (a == "--version") { + p.version = true; + } else if (a.size() > 1 && a[0] == '-') { + p.error = "Unknown flag: " + a; + return p; + } else { + if (p.file.empty()) { + p.file = a; + } else { + p.error = "Unexpected argument: " + a; + return p; + } + } + } + return p; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/cli/cli_args.h b/cpp/tools/cli/cli_args.h new file mode 100644 index 000000000..276bcc0e2 --- /dev/null +++ b/cpp/tools/cli/cli_args.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_CLI_ARGS_H +#define TSFILE_CLI_CLI_ARGS_H + +#include +#include +#include + +namespace tsfile_cli { + +struct ParsedArgs { + enum class Format { kAuto, kCsv, kTsv, kJson, kTable }; + + std::string command; + std::string file; + std::string device; + std::string table; + std::vector measurements; + long long limit = -1; + long long offset = 0; + long long start = LLONG_MIN; + long long end = LLONG_MAX; + bool has_start = false; + bool has_end = false; + long long seed = 0; + bool has_seed = false; + Format format = Format::kAuto; + bool no_header = false; + std::string model; + std::string output; + std::string columns; + bool verbose = false; + bool header_match = false; + bool help = false; + bool version = false; + std::string error; +}; + +ParsedArgs parse_args(const std::vector& args); + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_CLI_ARGS_H diff --git a/cpp/tools/cli/exit_codes.h b/cpp/tools/cli/exit_codes.h new file mode 100644 index 000000000..0ab6dfdf5 --- /dev/null +++ b/cpp/tools/cli/exit_codes.h @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_EXIT_CODES_H +#define TSFILE_CLI_EXIT_CODES_H + +namespace tsfile_cli { + +constexpr int kExitOk = 0; +constexpr int kExitUsage = 1; +constexpr int kExitFile = 2; +constexpr int kExitRuntime = 3; + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_EXIT_CODES_H diff --git a/cpp/tools/cli/run_cli.cc b/cpp/tools/cli/run_cli.cc new file mode 100644 index 000000000..85bfba8c6 --- /dev/null +++ b/cpp/tools/cli/run_cli.cc @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "cli/run_cli.h" + +#include +#include + +#include "cli/cli_args.h" +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "format/output_format.h" +#include "reader/tsfile_reader.h" + +#ifdef _WIN32 +#include +#define TSFILE_ISATTY _isatty +#define TSFILE_FILENO _fileno +#else +#include +#define TSFILE_ISATTY isatty +#define TSFILE_FILENO fileno +#endif + +#ifndef TSFILE_CLI_VERSION +#define TSFILE_CLI_VERSION "unknown" +#endif + +namespace tsfile_cli { +namespace { + +void print_usage(std::ostream& os) { + os << "Usage: tsfile-cli [options] \n" + "Commands:\n" + " ls list devices (tree) or tables (table)\n" + " schema per-measurement data type/encoding/compression\n" + " meta file metadata summary\n" + " stats per-series count, time range, " + "min/max/first/last/sum\n" + " head first N rows (use -n)\n" + " cat all rows of a device/table\n" + " count row count\n" + " sample deterministic sample rows (use -n and --seed)\n" + " write import CSV/TSV rows into a new table tsfile " + "(--table, --columns, -o)\n" + "Options: -f/--format csv|tsv|json|table, -d/--device, -t/--table,\n" + " -m/--measurements a,b, -n/--limit, --offset, --seed,\n" + " --start, --end,\n" + " --no-header, --model tree|table, -h/--help, --version\n" + "Write options: --table, --columns name:TYPE:tag|field,..., " + "-o/--output,\n" + " --header-match, -v/--verbose\n"; +} + +bool is_known_command(const std::string& c) { + static const std::set kCmds = {"ls", "schema", "meta", + "stats", "head", "cat", + "count", "sample", "write"}; + return kCmds.count(c) != 0; +} + +bool validate_command_flags(const ParsedArgs& p, std::ostream& err) { + if (p.has_seed && p.command != "sample") { + err << "Error: --seed is only valid for sample\n"; + return false; + } + if (p.command == "sample" && p.offset != 0) { + err << "Error: --offset is not valid for sample\n"; + return false; + } + if (!p.device.empty() && !p.table.empty()) { + err << "Error: --device and --table cannot be used together\n"; + return false; + } + if (p.limit < -1) { + err << "Error: --limit must be >= -1\n"; + return false; + } + if (p.offset < 0) { + err << "Error: --offset must be >= 0\n"; + return false; + } + if (p.has_start && p.has_end && p.start > p.end) { + err << "Error: --start must be <= --end\n"; + return false; + } + return true; +} + +bool validate_write_flags(const ParsedArgs& p, std::ostream& err) { + if (p.table.empty()) { + err << "Error: write requires --table\n"; + return false; + } + if (p.columns.empty()) { + err << "Error: write requires --columns\n"; + return false; + } + if (p.output.empty()) { + err << "Error: write requires -o/--output\n"; + return false; + } + if (p.format == ParsedArgs::Format::kJson || + p.format == ParsedArgs::Format::kTable) { + err << "Error: write input format must be csv or tsv\n"; + return false; + } + if (p.no_header && p.header_match) { + err << "Error: --header-match cannot be combined with --no-header\n"; + return false; + } + if (!p.measurements.empty() || !p.device.empty() || p.has_start || + p.has_end || p.has_seed || p.limit != -1 || p.offset != 0) { + err << "Error: read-only flags are not valid for write\n"; + return false; + } + return true; +} + +// Reject flags that have no effect for the given read command, instead of +// silently ignoring them, so misuse is caught rather than producing surprising +// output. Only called for non-write commands; write has its own validation. +bool validate_read_flag_applicability(const ParsedArgs& p, std::ostream& err) { + const std::string& c = p.command; + const bool is_row = (c == "head" || c == "cat" || c == "sample"); + const bool scoped = + is_row || c == "schema" || c == "stats" || c == "count"; + + if (!p.output.empty()) { + err << "Error: -o/--output is only valid for write\n"; + return false; + } + if (!p.columns.empty()) { + err << "Error: --columns is only valid for write\n"; + return false; + } + if (p.header_match) { + err << "Error: --header-match is only valid for write\n"; + return false; + } + if (p.verbose) { + err << "Error: -v/--verbose is only valid for write\n"; + return false; + } + if (!is_row && p.limit != -1) { + err << "Error: -n/--limit is only valid for head/cat/sample\n"; + return false; + } + if (!is_row && (p.has_start || p.has_end)) { + err << "Error: --start/--end are only valid for head/cat/sample\n"; + return false; + } + if (!scoped && !p.device.empty()) { + err << "Error: -d/--device is not valid for " << c << "\n"; + return false; + } + if (!scoped && !p.table.empty()) { + err << "Error: -t/--table is not valid for " << c << "\n"; + return false; + } + if (!scoped && !p.measurements.empty()) { + err << "Error: -m/--measurements is not valid for " << c << "\n"; + return false; + } + return true; +} + +} // namespace + +int run_cli(const std::vector& args, std::ostream& out, + std::ostream& err) { + ParsedArgs p = parse_args(args); + + if (p.version) { + out << "tsfile-cli (Apache TsFile C++) " << TSFILE_CLI_VERSION << "\n"; + return kExitOk; + } + if (args.empty()) { + print_usage(err); + return kExitUsage; + } + if (p.command == "help" || p.command == "--help" || p.command == "-h" || + p.help) { + print_usage(out); + return kExitOk; + } + if (!p.error.empty()) { + err << "Error: " << p.error << "\n"; + print_usage(err); + return kExitUsage; + } + if (!is_known_command(p.command)) { + err << "Unknown command: " << p.command << "\n"; + print_usage(err); + return kExitUsage; + } + if (p.command != "write" && p.file.empty()) { + err << "Error: missing argument\n"; + return kExitUsage; + } + if (!validate_command_flags(p, err)) { + print_usage(err); + return kExitUsage; + } + + if (p.command == "write") { + if (!validate_write_flags(p, err)) { + print_usage(err); + return kExitUsage; + } + storage::libtsfile_init(); + return cmd_write(p, out, err); + } + + if (!validate_read_flag_applicability(p, err)) { + print_usage(err); + return kExitUsage; + } + + storage::libtsfile_init(); + storage::TsFileReader reader; + int open_ret = reader.open(p.file); + if (open_ret != 0) { + err << "Error: cannot open or corrupted file: " << p.file << "\n"; + return kExitFile; + } + + bool stdout_tty = TSFILE_ISATTY(TSFILE_FILENO(stdout)) != 0; + OutputFormat fmt = resolve_format(p.format, stdout_tty); + + int code; + if (p.command == "ls") { + code = cmd_ls(p, reader, fmt, out, err); + } else if (p.command == "schema") { + code = cmd_schema(p, reader, fmt, out, err); + } else if (p.command == "meta") { + code = cmd_meta(p, reader, fmt, out, err); + } else if (p.command == "stats") { + code = cmd_stats(p, reader, fmt, out, err); + } else if (p.command == "head") { + code = cmd_head(p, reader, fmt, out, err); + } else if (p.command == "cat") { + code = cmd_cat(p, reader, fmt, out, err); + } else if (p.command == "count") { + code = cmd_count(p, reader, fmt, out, err); + } else if (p.command == "sample") { + code = cmd_sample(p, reader, fmt, out, err); + } else { + err << "Unknown command: " << p.command << "\n"; + code = kExitUsage; + } + + reader.close(); + return code; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/cli/run_cli.h b/cpp/tools/cli/run_cli.h new file mode 100644 index 000000000..79439d152 --- /dev/null +++ b/cpp/tools/cli/run_cli.h @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_RUN_CLI_H +#define TSFILE_CLI_RUN_CLI_H + +#include +#include +#include + +namespace tsfile_cli { + +int run_cli(const std::vector& args, std::ostream& out, + std::ostream& err); + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_RUN_CLI_H diff --git a/cpp/tools/commands/cmd_cat.cc b/cpp/tools/commands/cmd_cat.cc new file mode 100644 index 000000000..b1af65d98 --- /dev/null +++ b/cpp/tools/commands/cmd_cat.cc @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "commands/commands.h" + +namespace tsfile_cli { + +int cmd_cat(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err) { + return run_row_query(args, reader, fmt, out, err, args.offset, args.limit); +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_count.cc b/cpp/tools/commands/cmd_count.cc new file mode 100644 index 000000000..9480744c6 --- /dev/null +++ b/cpp/tools/commands/cmd_count.cc @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "commands/statistics.h" + +namespace tsfile_cli { + +int cmd_count(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& /*err*/) { + RowWriter w(out, fmt, {"target", "measurement", "count"}, + {common::STRING, common::STRING, common::INT64}, + args.no_header); + + long long total = 0; + std::vector rows = collect_series_stats(args, reader); + for (const SeriesStatRow& row : rows) { + total += row.count; + w.write({row.target, row.measurement, std::to_string(row.count)}, + {false, false, false}); + } + w.write({"total", "", std::to_string(total)}, {false, true, false}); + w.finish(); + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_head.cc b/cpp/tools/commands/cmd_head.cc new file mode 100644 index 000000000..06b01908f --- /dev/null +++ b/cpp/tools/commands/cmd_head.cc @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "commands/commands.h" + +namespace tsfile_cli { + +int cmd_head(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err) { + long long limit = args.limit < 0 ? 10 : args.limit; + return run_row_query(args, reader, fmt, out, err, args.offset, limit); +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_ls.cc b/cpp/tools/commands/cmd_ls.cc new file mode 100644 index 000000000..675151e8a --- /dev/null +++ b/cpp/tools/commands/cmd_ls.cc @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "reader/tsfile_reader.h" + +namespace tsfile_cli { + +bool is_table_model(const ParsedArgs& args, storage::TsFileReader& reader) { + if (args.model == "tree") { + return false; + } + if (args.model == "table") { + return true; + } + return !reader.get_all_table_schemas().empty(); +} + +int cmd_ls(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& /*err*/) { + std::vector names; + if (is_table_model(args, reader)) { + for (auto& ts : reader.get_all_table_schemas()) { + if (ts) { + names.push_back(ts->get_table_name()); + } + } + } else { + for (auto& dev : reader.get_all_device_ids()) { + if (dev) { + names.push_back(dev->get_device_name()); + } + } + } + + RowWriter w(out, fmt, {"name"}, {common::STRING}, args.no_header); + for (const std::string& n : names) { + w.write({n}, {false}); + } + w.finish(); + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_meta.cc b/cpp/tools/commands/cmd_meta.cc new file mode 100644 index 000000000..dd70029f3 --- /dev/null +++ b/cpp/tools/commands/cmd_meta.cc @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "commands/statistics.h" +#include "reader/tsfile_reader.h" + +namespace tsfile_cli { + +int cmd_meta(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& /*err*/) { + RowWriter w(out, fmt, + {"file", "model", "device_count", "table_count", "series_count", + "start_time", "end_time", "file_size_bytes"}, + {common::STRING, common::STRING, common::INT64, common::INT64, + common::INT64, common::INT64, common::INT64, common::INT64}, + args.no_header); + + FileSummary s = collect_file_summary(args, reader); + w.write({s.file, s.model, std::to_string(s.device_count), + std::to_string(s.table_count), std::to_string(s.series_count), + s.has_time_range ? std::to_string(s.start_time) : "", + s.has_time_range ? std::to_string(s.end_time) : "", + std::to_string(s.file_size_bytes)}, + {false, false, false, false, false, !s.has_time_range, + !s.has_time_range, false}); + w.finish(); + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_sample.cc b/cpp/tools/commands/cmd_sample.cc new file mode 100644 index 000000000..744dd6577 --- /dev/null +++ b/cpp/tools/commands/cmd_sample.cc @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "common/device_id.h" +#include "common/schema.h" +#include "format/result_set_format.h" +#include "reader/tsfile_reader.h" + +namespace tsfile_cli { + +int cmd_sample(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err) { + const int64_t start = args.has_start ? static_cast(args.start) + : std::numeric_limits::min(); + const int64_t end = args.has_end ? static_cast(args.end) + : std::numeric_limits::max(); + storage::ResultSet* rs = nullptr; + int qret = 0; + + if (is_table_model(args, reader)) { + std::string table_name = args.table; + if (table_name.empty()) { + auto schemas = reader.get_all_table_schemas(); + if (schemas.empty() || !schemas[0]) { + err << "Error: no table found in file\n"; + return kExitRuntime; + } + table_name = schemas[0]->get_table_name(); + } + std::vector cols = args.measurements; + if (cols.empty()) { + auto ts = reader.get_table_schema(table_name); + if (ts) { + cols = ts->get_measurement_names(); + } + } + qret = reader.query(table_name, cols, start, end, rs); + } else { + std::vector devices; + if (!args.device.empty()) { + devices.push_back(args.device); + } else { + for (auto& d : reader.get_all_device_ids()) { + if (d) { + devices.push_back(d->get_device_name()); + } + } + } + std::vector paths; + for (const std::string& dev : devices) { + std::vector ms = args.measurements; + if (ms.empty()) { + auto did = std::make_shared(dev); + std::vector sch; + if (reader.get_timeseries_schema(did, sch) == 0) { + for (auto& m : sch) { + ms.push_back(m.measurement_name_); + } + } + } + for (const std::string& m : ms) { + paths.push_back(dev + "." + m); + } + } + if (paths.empty()) { + err << "Error: no time series found\n"; + return kExitRuntime; + } + qret = reader.query(paths, start, end, rs); + } + + if (qret != 0 || rs == nullptr) { + err << "Error: query failed: " << error_code_message(qret) << "\n"; + if (rs != nullptr) { + reader.destroy_query_data_set(rs); + } + return kExitRuntime; + } + + const long long limit = args.limit < 0 ? 10 : args.limit; + const unsigned long long seed = + args.has_seed ? static_cast(args.seed) : 0ULL; + int wret = + emit_result_set_sampled(rs, fmt, args.no_header, out, limit, seed); + reader.destroy_query_data_set(rs); + if (wret != 0) { + err << "Error: failed to read rows: " << error_code_message(wret) + << "\n"; + return kExitRuntime; + } + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_schema.cc b/cpp/tools/commands/cmd_schema.cc new file mode 100644 index 000000000..7ca5c5e05 --- /dev/null +++ b/cpp/tools/commands/cmd_schema.cc @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "common/schema.h" +#include "reader/tsfile_reader.h" + +namespace tsfile_cli { +namespace { + +void write_table_schema_rows(const ParsedArgs& args, + storage::TsFileReader& reader, RowWriter& w) { + auto schemas = reader.get_all_table_schemas(); + for (auto& schema : schemas) { + if (!schema) { + continue; + } + if (!args.table.empty() && schema->get_table_name() != args.table) { + continue; + } + for (const auto& ms : schema->get_measurement_schemas()) { + if (!ms) { + continue; + } + const std::string& name = ms->measurement_name_; + if (!args.measurements.empty() && + std::find(args.measurements.begin(), args.measurements.end(), + name) == args.measurements.end()) { + continue; + } + w.write({schema->get_table_name(), name, + tsdatatype_name(ms->data_type_), + tsencoding_name(ms->encoding_), + compression_name(ms->compression_type_)}, + {false, false, false, false, false}); + } + } +} + +} // namespace + +int cmd_schema(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& /*err*/) { + RowWriter w( + out, fmt, + {"target", "measurement", "datatype", "encoding", "compression"}, + {common::STRING, common::STRING, common::STRING, common::STRING, + common::STRING}, + args.no_header); + + if (is_table_model(args, reader)) { + write_table_schema_rows(args, reader, w); + w.finish(); + return kExitOk; + } + + storage::DeviceTimeseriesMetadataMap meta = + reader.get_timeseries_metadata(); + for (auto& kv : meta) { + std::string target = kv.first ? kv.first->get_device_name() : ""; + if (!args.device.empty() && target != args.device) { + continue; + } + + std::map> enc_comp; + if (kv.first) { + std::vector ms; + if (reader.get_timeseries_schema(kv.first, ms) == 0) { + for (auto& m : ms) { + enc_comp[m.measurement_name_] = + std::make_pair(tsencoding_name(m.encoding_), + compression_name(m.compression_type_)); + } + } + } + + for (auto& ts : kv.second) { + if (!ts) { + continue; + } + std::string m = ts->get_measurement_name().to_std_string(); + if (!args.measurements.empty() && + std::find(args.measurements.begin(), args.measurements.end(), + m) == args.measurements.end()) { + continue; + } + std::string enc; + std::string comp; + auto it = enc_comp.find(m); + if (it != enc_comp.end()) { + enc = it->second.first; + comp = it->second.second; + } + w.write( + {target, m, tsdatatype_name(ts->get_data_type()), enc, comp}, + {false, false, false, enc.empty(), comp.empty()}); + } + } + w.finish(); + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_stats.cc b/cpp/tools/commands/cmd_stats.cc new file mode 100644 index 000000000..898ca469b --- /dev/null +++ b/cpp/tools/commands/cmd_stats.cc @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "commands/statistics.h" + +namespace tsfile_cli { + +int cmd_stats(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& /*err*/) { + RowWriter w(out, fmt, + {"target", "measurement", "count", "start_time", "end_time", + "min", "max", "first", "last", "sum"}, + {common::STRING, common::STRING, common::INT64, common::INT64, + common::INT64, common::STRING, common::STRING, common::STRING, + common::STRING, common::STRING}, + args.no_header); + + std::vector rows = collect_series_stats(args, reader); + for (const SeriesStatRow& row : rows) { + std::vector cells = { + row.target, row.measurement, std::to_string(row.count), + std::to_string(row.start_time), std::to_string(row.end_time)}; + cells.insert(cells.end(), row.value_cells.values.begin(), + row.value_cells.values.end()); + + std::vector nulls = {false, false, false, row.count == 0, + row.count == 0}; + nulls.insert(nulls.end(), row.value_cells.is_null.begin(), + row.value_cells.is_null.end()); + w.write(cells, nulls); + } + w.finish(); + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/cmd_write.cc b/cpp/tools/commands/cmd_write.cc new file mode 100644 index 000000000..a9ce8d81f --- /dev/null +++ b/cpp/tools/commands/cmd_write.cc @@ -0,0 +1,354 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cli/cli_args.h" +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "common/schema.h" +#include "common/tablet.h" +#include "file/write_file.h" +#include "format/input_format.h" +#include "writer/tsfile_table_writer.h" + +namespace tsfile_cli { +namespace { + +struct DataRow { + long long line_no; + int64_t timestamp; + std::vector cells; +}; + +void strip_cr(std::string& s) { + if (!s.empty() && s.back() == '\r') { + s.pop_back(); + } +} + +bool add_typed_value(storage::Tablet& tablet, uint32_t row, + const ColumnDef& def, const std::string& cell, + std::string& error) { + if (cell.empty()) { + return true; // null + } + char* e = nullptr; + switch (def.type) { + case common::BOOLEAN: { + bool v = false; + if (!parse_bool_cell(cell, v)) { + error = "bad BOOLEAN '" + cell + "'"; + return false; + } + tablet.add_value(row, def.name, v); + return true; + } + case common::INT32: { + errno = 0; + long long v = std::strtoll(cell.c_str(), &e, 10); + if (e == nullptr || *e != '\0') { + error = "bad INT32 '" + cell + "'"; + return false; + } + if (errno == ERANGE || v < INT32_MIN || v > INT32_MAX) { + error = "INT32 out of range '" + cell + "'"; + return false; + } + tablet.add_value(row, def.name, static_cast(v)); + return true; + } + case common::INT64: { + errno = 0; + long long v = std::strtoll(cell.c_str(), &e, 10); + if (e == nullptr || *e != '\0') { + error = "bad INT64 '" + cell + "'"; + return false; + } + if (errno == ERANGE) { + error = "INT64 out of range '" + cell + "'"; + return false; + } + tablet.add_value(row, def.name, static_cast(v)); + return true; + } + case common::FLOAT: { + errno = 0; + float v = std::strtof(cell.c_str(), &e); + if (e == nullptr || *e != '\0') { + error = "bad FLOAT '" + cell + "'"; + return false; + } + if (errno == ERANGE) { + error = "FLOAT out of range '" + cell + "'"; + return false; + } + tablet.add_value(row, def.name, v); + return true; + } + case common::DOUBLE: { + errno = 0; + double v = std::strtod(cell.c_str(), &e); + if (e == nullptr || *e != '\0') { + error = "bad DOUBLE '" + cell + "'"; + return false; + } + if (errno == ERANGE) { + error = "DOUBLE out of range '" + cell + "'"; + return false; + } + tablet.add_value(row, def.name, v); + return true; + } + case common::STRING: + case common::TEXT: { + tablet.add_value(row, def.name, cell); + return true; + } + default: + error = "unsupported column type"; + return false; + } +} + +} // namespace + +int cmd_write(const ParsedArgs& args, std::ostream& /*out*/, + std::ostream& err) { + std::vector columns; + std::string perr; + if (!parse_columns_spec(args.columns, columns, perr)) { + err << "Error: " << perr << "\n"; + return kExitUsage; + } + + std::istream* in = &std::cin; + std::ifstream fin; + if (!args.file.empty() && args.file != "-") { + fin.open(args.file.c_str()); + if (!fin.is_open()) { + err << "Error: cannot open input: " << args.file << "\n"; + return kExitFile; + } + in = &fin; + } + + const char delim = (args.format == ParsedArgs::Format::kTsv) ? '\t' : ','; + const bool csv_quotes = (delim == ','); + + std::string line; + long long line_no = 0; + if (!args.no_header) { + if (std::getline(*in, line)) { + ++line_no; + strip_cr(line); + if (args.header_match) { + std::vector h = + split_line(line, delim, csv_quotes); + bool ok = (h.size() == columns.size() + 1); + for (size_t i = 0; ok && i < columns.size(); ++i) { + if (h[i + 1] != columns[i].name) { + ok = false; + } + } + if (!ok) { + err << "Error: header does not match --columns (line 1)\n"; + return kExitRuntime; + } + } + } + } + + std::vector names; + std::vector types; + std::vector cats; + std::vector col_schemas; + std::vector tag_idx; + for (size_t j = 0; j < columns.size(); ++j) { + const ColumnDef& d = columns[j]; + names.push_back(d.name); + types.push_back(d.type); + cats.push_back(d.category); + col_schemas.push_back(common::ColumnSchema( + d.name, d.type, common::UNCOMPRESSED, common::PLAIN, d.category)); + if (d.category == common::ColumnCategory::TAG) { + tag_idx.push_back(j); + } + } + + // Creating the output truncates it; refuse to clobber the input we are + // still reading from, which would otherwise silently destroy the source + // data. + if (!args.file.empty() && args.file != "-" && args.output == args.file) { + err << "Error: --output is the same as the input file: " << args.output + << "\n"; + return kExitUsage; + } + + storage::WriteFile file; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + if (file.create(args.output, flags, 0666) != 0) { + err << "Error: cannot create output: " << args.output << "\n"; + return kExitFile; + } + auto* schema = new storage::TableSchema(args.table, col_schemas); + auto* writer = new storage::TsFileTableWriter(&file, schema); + + // Stream rows into fixed-size batches so memory stays bounded regardless of + // input size; a full file is never buffered in memory. + const size_t kBatch = 1024; + int rc = kExitOk; + long long total_rows = 0; + std::vector batch; + batch.reserve(kBatch); + // The table writer requires strictly increasing timestamps per device, and + // a device is identified by its tag-column values. Track the last timestamp + // seen for each device so out-of-order input is rejected with a clear, + // located message instead of an opaque write failure. + std::unordered_map last_ts_by_device; + + auto flush_batch = [&]() -> bool { + if (batch.empty()) { + return true; + } + storage::Tablet tablet(args.table, names, types, cats, + static_cast(batch.size())); + for (size_t i = 0; i < batch.size(); ++i) { + uint32_t r = static_cast(i); + tablet.add_timestamp(r, batch[i].timestamp); + for (size_t j = 0; j < columns.size(); ++j) { + std::string cell_err; + if (!add_typed_value(tablet, r, columns[j], batch[i].cells[j], + cell_err)) { + err << "Error: " << cell_err << " (line " + << batch[i].line_no << ")\n"; + return false; + } + } + } + int wt = writer->write_table(tablet); + if (wt != 0) { + err << "Error: failed to write rows: " << error_code_message(wt) + << "\n"; + return false; + } + total_rows += static_cast(batch.size()); + batch.clear(); + return true; + }; + + while (std::getline(*in, line)) { + ++line_no; + strip_cr(line); + if (line.empty()) { + continue; + } + std::vector fields = split_line(line, delim, csv_quotes); + if (fields.size() != columns.size() + 1) { + err << "Error: expected " << (columns.size() + 1) << " fields, got " + << fields.size() << " (line " << line_no << ")\n"; + rc = kExitRuntime; + break; + } + char* e = nullptr; + errno = 0; + long long ts = std::strtoll(fields[0].c_str(), &e, 10); + if (e == nullptr || *e != '\0' || errno == ERANGE) { + err << "Error: bad timestamp '" << fields[0] << "' (line " + << line_no << ")\n"; + rc = kExitRuntime; + break; + } + DataRow r; + r.line_no = line_no; + r.timestamp = static_cast(ts); + r.cells.assign(fields.begin() + 1, fields.end()); + + std::string device_key; + for (size_t k : tag_idx) { + device_key += r.cells[k]; + device_key.push_back('\0'); + } + auto seen = last_ts_by_device.find(device_key); + if (seen != last_ts_by_device.end() && r.timestamp <= seen->second) { + err << "Error: timestamps must be strictly increasing per device " + "(line " + << line_no << ": " << r.timestamp << " <= previous " + << seen->second << ")\n"; + rc = kExitRuntime; + break; + } + last_ts_by_device[device_key] = r.timestamp; + + batch.push_back(r); + if (batch.size() >= kBatch && !flush_batch()) { + rc = kExitRuntime; + break; + } + } + + if (rc == kExitOk && !flush_batch()) { + rc = kExitRuntime; + } + + if (rc == kExitOk) { + int fr = writer->flush(); + if (fr != 0) { + err << "Error: failed to flush output: " << error_code_message(fr) + << "\n"; + rc = kExitRuntime; + } else { + int cr = writer->close(); + if (cr != 0) { + err << "Error: failed to close output: " + << error_code_message(cr) << "\n"; + rc = kExitRuntime; + } + } + } else { + writer->close(); + } + delete writer; + delete schema; + + if (rc != kExitOk) { + // The import failed; do not leave a partial/corrupt .tsfile behind. + file.close(); + std::remove(args.output.c_str()); + } else if (args.verbose) { + err << "wrote " << total_rows << " rows to " << args.output << "\n"; + } + return rc; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/commands.h b/cpp/tools/commands/commands.h new file mode 100644 index 000000000..085a54822 --- /dev/null +++ b/cpp/tools/commands/commands.h @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_COMMANDS_H +#define TSFILE_CLI_COMMANDS_H + +#include + +#include "cli/cli_args.h" +#include "format/output_format.h" + +namespace storage { +class TsFileReader; +} // namespace storage + +namespace tsfile_cli { + +bool is_table_model(const ParsedArgs& args, storage::TsFileReader& reader); + +int run_row_query(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err, + long long offset, long long limit); + +int cmd_ls(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_schema(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_meta(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_count(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_stats(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_head(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_cat(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_sample(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err); +int cmd_write(const ParsedArgs& args, std::ostream& out, std::ostream& err); + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_COMMANDS_H diff --git a/cpp/tools/commands/row_query.cc b/cpp/tools/commands/row_query.cc new file mode 100644 index 000000000..2ea247ad3 --- /dev/null +++ b/cpp/tools/commands/row_query.cc @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include + +#include "cli/exit_codes.h" +#include "commands/commands.h" +#include "common/device_id.h" +#include "common/schema.h" +#include "format/result_set_format.h" +#include "reader/tsfile_reader.h" + +namespace tsfile_cli { + +int run_row_query(const ParsedArgs& args, storage::TsFileReader& reader, + OutputFormat fmt, std::ostream& out, std::ostream& err, + long long offset, long long limit) { + const int64_t start = args.has_start ? static_cast(args.start) + : std::numeric_limits::min(); + const int64_t end = args.has_end ? static_cast(args.end) + : std::numeric_limits::max(); + + storage::ResultSet* rs = nullptr; + int qret = 0; + + if (is_table_model(args, reader)) { + std::string table_name = args.table; + if (table_name.empty()) { + auto schemas = reader.get_all_table_schemas(); + if (schemas.empty() || !schemas[0]) { + err << "Error: no table found in file\n"; + return kExitRuntime; + } + table_name = schemas[0]->get_table_name(); + } + std::vector cols = args.measurements; + if (cols.empty()) { + auto ts = reader.get_table_schema(table_name); + if (ts) { + cols = ts->get_measurement_names(); + } + } + qret = reader.query(table_name, cols, start, end, rs); + } else { + std::vector devices; + if (!args.device.empty()) { + devices.push_back(args.device); + } else { + for (auto& d : reader.get_all_device_ids()) { + if (d) { + devices.push_back(d->get_device_name()); + } + } + } + + std::vector paths; + for (const std::string& dev : devices) { + std::vector ms = args.measurements; + if (ms.empty()) { + auto did = std::make_shared(dev); + std::vector sch; + if (reader.get_timeseries_schema(did, sch) == 0) { + for (auto& m : sch) { + ms.push_back(m.measurement_name_); + } + } + } + for (const std::string& m : ms) { + paths.push_back(dev + "." + m); + } + } + if (paths.empty()) { + err << "Error: no time series found\n"; + return kExitRuntime; + } + qret = reader.query(paths, start, end, rs); + } + + if (qret != 0 || rs == nullptr) { + err << "Error: query failed: " << error_code_message(qret) << "\n"; + if (rs != nullptr) { + reader.destroy_query_data_set(rs); + } + return kExitRuntime; + } + + int wret = emit_result_set(rs, fmt, args.no_header, out, offset, limit); + reader.destroy_query_data_set(rs); + if (wret != 0) { + err << "Error: failed to read rows: " << error_code_message(wret) + << "\n"; + return kExitRuntime; + } + return kExitOk; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/statistics.cc b/cpp/tools/commands/statistics.cc new file mode 100644 index 000000000..e9bb8d6e5 --- /dev/null +++ b/cpp/tools/commands/statistics.cc @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "commands/statistics.h" + +#include +#include +#include +#include + +#include "commands/commands.h" +#include "common/statistic.h" +#include "reader/tsfile_reader.h" + +namespace tsfile_cli { +namespace { + +template +std::string value_to_string(T value) { + std::ostringstream ss; + ss << value; + return ss.str(); +} + +std::string bool_to_string(bool value) { return value ? "true" : "false"; } + +std::string string_to_std(const common::String& value) { + return value.to_std_string(); +} + +long long file_size(const std::string& path) { + std::ifstream in(path.c_str(), std::ios::binary | std::ios::ate); + if (!in.good()) { + return 0; + } + return static_cast(in.tellg()); +} + +} // namespace + +StatisticCells statistic_value_cells(storage::Statistic* st) { + StatisticCells cells; + cells.values.assign(5, ""); + cells.is_null.assign(5, true); + if (st == nullptr || st->get_count() == 0) { + return cells; + } + + switch (st->get_type()) { + case common::BOOLEAN: { + auto* s = static_cast(st); + cells.values = {"", "", bool_to_string(s->first_value_), + bool_to_string(s->last_value_), + value_to_string(s->sum_value_)}; + cells.is_null = {true, true, false, false, false}; + break; + } + case common::INT32: + case common::DATE: { + auto* s = static_cast(st); + cells.values = {value_to_string(s->min_value_), + value_to_string(s->max_value_), + value_to_string(s->first_value_), + value_to_string(s->last_value_), + value_to_string(s->sum_value_)}; + cells.is_null = {false, false, false, false, false}; + break; + } + case common::INT64: + case common::TIMESTAMP: { + auto* s = static_cast(st); + cells.values = {value_to_string(s->min_value_), + value_to_string(s->max_value_), + value_to_string(s->first_value_), + value_to_string(s->last_value_), + value_to_string(s->sum_value_)}; + cells.is_null = {false, false, false, false, false}; + break; + } + case common::FLOAT: { + auto* s = static_cast(st); + cells.values = {value_to_string(s->min_value_), + value_to_string(s->max_value_), + value_to_string(s->first_value_), + value_to_string(s->last_value_), + value_to_string(s->sum_value_)}; + cells.is_null = {false, false, false, false, false}; + break; + } + case common::DOUBLE: { + auto* s = static_cast(st); + cells.values = {value_to_string(s->min_value_), + value_to_string(s->max_value_), + value_to_string(s->first_value_), + value_to_string(s->last_value_), + value_to_string(s->sum_value_)}; + cells.is_null = {false, false, false, false, false}; + break; + } + case common::STRING: { + auto* s = static_cast(st); + cells.values = {string_to_std(s->min_value_), + string_to_std(s->max_value_), + string_to_std(s->first_value_), + string_to_std(s->last_value_), ""}; + cells.is_null = {false, false, false, false, true}; + break; + } + case common::TEXT: { + auto* s = static_cast(st); + cells.values = {"", "", string_to_std(s->first_value_), + string_to_std(s->last_value_), ""}; + cells.is_null = {true, true, false, false, true}; + break; + } + default: + break; + } + return cells; +} + +std::vector collect_series_stats(const ParsedArgs& args, + storage::TsFileReader& reader) { + std::vector rows; + storage::DeviceTimeseriesMetadataMap meta = + reader.get_timeseries_metadata(); + for (auto& kv : meta) { + std::string target = kv.first ? kv.first->get_device_name() : ""; + if (!args.device.empty() && target != args.device) { + continue; + } + if (!args.table.empty() && kv.first && + kv.first->get_table_name() != args.table) { + continue; + } + for (auto& ts : kv.second) { + if (!ts) { + continue; + } + std::string measurement = + ts->get_measurement_name().to_std_string(); + if (!args.measurements.empty() && + std::find(args.measurements.begin(), args.measurements.end(), + measurement) == args.measurements.end()) { + continue; + } + storage::Statistic* st = ts->get_statistic(); + SeriesStatRow row; + row.target = target; + row.measurement = measurement; + if (st != nullptr) { + row.count = st->get_count(); + row.start_time = st->start_time_; + row.end_time = st->end_time_; + row.value_cells = statistic_value_cells(st); + } else { + row.value_cells.values.assign(5, ""); + row.value_cells.is_null.assign(5, true); + } + rows.push_back(row); + } + } + return rows; +} + +FileSummary collect_file_summary(const ParsedArgs& args, + storage::TsFileReader& reader) { + FileSummary s; + s.file = args.file; + s.model = is_table_model(args, reader) ? "table" : "tree"; + s.device_count = static_cast(reader.get_all_device_ids().size()); + s.table_count = + static_cast(reader.get_all_table_schemas().size()); + s.file_size_bytes = file_size(args.file); + + ParsedArgs all = args; + all.device.clear(); + all.table.clear(); + all.measurements.clear(); + std::vector rows = collect_series_stats(all, reader); + s.series_count = static_cast(rows.size()); + long long min_start = std::numeric_limits::max(); + long long max_end = std::numeric_limits::min(); + for (const SeriesStatRow& row : rows) { + if (row.count <= 0) { + continue; + } + min_start = std::min(min_start, row.start_time); + max_end = std::max(max_end, row.end_time); + s.has_time_range = true; + } + if (s.has_time_range) { + s.start_time = min_start; + s.end_time = max_end; + } + return s; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/commands/statistics.h b/cpp/tools/commands/statistics.h new file mode 100644 index 000000000..031b5b4aa --- /dev/null +++ b/cpp/tools/commands/statistics.h @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_STATISTICS_H +#define TSFILE_CLI_STATISTICS_H + +#include +#include + +#include "cli/cli_args.h" + +namespace storage { +class Statistic; +class TsFileReader; +} // namespace storage + +namespace tsfile_cli { + +struct StatisticCells { + std::vector values; + std::vector is_null; +}; + +struct SeriesStatRow { + std::string target; + std::string measurement; + long long count = 0; + long long start_time = 0; + long long end_time = 0; + StatisticCells value_cells; +}; + +struct FileSummary { + std::string file; + std::string model; + long long device_count = 0; + long long table_count = 0; + long long series_count = 0; + long long start_time = 0; + long long end_time = 0; + bool has_time_range = false; + long long file_size_bytes = 0; +}; + +StatisticCells statistic_value_cells(storage::Statistic* st); +std::vector collect_series_stats(const ParsedArgs& args, + storage::TsFileReader& reader); +FileSummary collect_file_summary(const ParsedArgs& args, + storage::TsFileReader& reader); + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_STATISTICS_H diff --git a/cpp/tools/format/input_format.cc b/cpp/tools/format/input_format.cc new file mode 100644 index 000000000..bb04a202a --- /dev/null +++ b/cpp/tools/format/input_format.cc @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "format/input_format.h" + +#include + +namespace tsfile_cli { + +bool parse_datatype_name(const std::string& s, common::TSDataType& out) { + std::string u; + u.reserve(s.size()); + for (char c : s) { + u += static_cast(std::toupper(static_cast(c))); + } + if (u == "BOOLEAN") { + out = common::BOOLEAN; + } else if (u == "INT32") { + out = common::INT32; + } else if (u == "INT64") { + out = common::INT64; + } else if (u == "FLOAT") { + out = common::FLOAT; + } else if (u == "DOUBLE") { + out = common::DOUBLE; + } else if (u == "STRING") { + out = common::STRING; + } else if (u == "TEXT") { + out = common::TEXT; + } else { + return false; + } + return true; +} + +bool parse_category(const std::string& s, common::ColumnCategory& out) { + if (s == "tag") { + out = common::ColumnCategory::TAG; + } else if (s == "field") { + out = common::ColumnCategory::FIELD; + } else { + return false; + } + return true; +} + +std::vector split_line(const std::string& line, char delim, + bool csv_quotes) { + std::vector out; + std::string field; + if (!csv_quotes) { + for (char c : line) { + if (c == delim) { + out.push_back(field); + field.clear(); + } else { + field += c; + } + } + out.push_back(field); + return out; + } + bool in_quotes = false; + for (size_t i = 0; i < line.size(); ++i) { + char c = line[i]; + if (in_quotes) { + if (c == '"') { + if (i + 1 < line.size() && line[i + 1] == '"') { + field += '"'; + ++i; + } else { + in_quotes = false; + } + } else { + field += c; + } + } else if (c == '"') { + in_quotes = true; + } else if (c == delim) { + out.push_back(field); + field.clear(); + } else { + field += c; + } + } + out.push_back(field); + return out; +} + +bool parse_columns_spec(const std::string& spec, std::vector& out, + std::string& error) { + out.clear(); + if (spec.empty()) { + error = "empty --columns"; + return false; + } + std::vector items = split_line(spec, ',', false); + for (const std::string& item : items) { + std::vector parts = split_line(item, ':', false); + if (parts.size() != 3) { + error = "bad column '" + item + "' (want name:TYPE:category)"; + return false; + } + ColumnDef def; + def.name = parts[0]; + if (def.name.empty()) { + error = "empty column name in '" + item + "'"; + return false; + } + if (!parse_datatype_name(parts[1], def.type)) { + error = "unknown type '" + parts[1] + "'"; + return false; + } + if (!parse_category(parts[2], def.category)) { + error = "bad category '" + parts[2] + "' (want tag|field)"; + return false; + } + for (const ColumnDef& prev : out) { + if (prev.name == def.name) { + error = "duplicate column name '" + def.name + "'"; + return false; + } + } + out.push_back(def); + } + return true; +} + +bool parse_bool_cell(const std::string& s, bool& out) { + std::string l; + l.reserve(s.size()); + for (char c : s) { + l += static_cast(std::tolower(static_cast(c))); + } + if (l == "true" || l == "1") { + out = true; + return true; + } + if (l == "false" || l == "0") { + out = false; + return true; + } + return false; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/format/input_format.h b/cpp/tools/format/input_format.h new file mode 100644 index 000000000..2838b0ff4 --- /dev/null +++ b/cpp/tools/format/input_format.h @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_INPUT_FORMAT_H +#define TSFILE_CLI_INPUT_FORMAT_H + +#include +#include + +#include "common/db_common.h" +#include "utils/db_utils.h" + +namespace tsfile_cli { + +struct ColumnDef { + std::string name; + common::TSDataType type; + common::ColumnCategory category; +}; + +bool parse_datatype_name(const std::string& s, common::TSDataType& out); +bool parse_category(const std::string& s, common::ColumnCategory& out); +bool parse_columns_spec(const std::string& spec, std::vector& out, + std::string& error); +std::vector split_line(const std::string& line, char delim, + bool csv_quotes); +bool parse_bool_cell(const std::string& s, bool& out); + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_INPUT_FORMAT_H diff --git a/cpp/tools/format/output_format.cc b/cpp/tools/format/output_format.cc new file mode 100644 index 000000000..32f0dbed1 --- /dev/null +++ b/cpp/tools/format/output_format.cc @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "format/output_format.h" + +#include +#include +#include + +#include "utils/errno_define.h" + +namespace tsfile_cli { + +const char* error_code_message(int code) { + switch (code) { + case common::E_OOM: + return "out of memory"; + case common::E_NOT_EXIST: + return "not found"; + case common::E_INVALID_ARG: + return "invalid argument"; + case common::E_OUT_OF_RANGE: + return "value out of range"; + case common::E_OUT_OF_ORDER: + return "data is out of order"; + case common::E_FILE_OPEN_ERR: + return "cannot open file"; + case common::E_FILE_WRITE_ERR: + return "file write error"; + case common::E_FILE_READ_ERR: + return "file read error"; + case common::E_TSFILE_CORRUPTED: + return "file is corrupted"; + case common::E_INVALID_PATH: + return "invalid path"; + case common::E_DEVICE_NOT_EXIST: + return "device does not exist"; + case common::E_MEASUREMENT_NOT_EXIST: + return "measurement does not exist"; + case common::E_TABLE_NOT_EXIST: + return "table does not exist"; + case common::E_COLUMN_NOT_EXIST: + return "column does not exist"; + case common::E_INVALID_QUERY: + return "invalid query"; + case common::E_TYPE_NOT_SUPPORTED: + return "data type not supported"; + case common::E_TYPE_NOT_MATCH: + return "data type mismatch"; + case common::E_ENCODE_ERR: + return "failed to encode data"; + case common::E_DECODE_ERR: + return "failed to decode data"; + default: + return "internal error"; + } +} + +OutputFormat resolve_format(ParsedArgs::Format f, bool stdout_is_tty) { + switch (f) { + case ParsedArgs::Format::kCsv: + return OutputFormat::kCsv; + case ParsedArgs::Format::kTsv: + return OutputFormat::kTsv; + case ParsedArgs::Format::kJson: + return OutputFormat::kJson; + case ParsedArgs::Format::kTable: + return OutputFormat::kTable; + case ParsedArgs::Format::kAuto: + default: + return stdout_is_tty ? OutputFormat::kTable : OutputFormat::kTsv; + } +} + +const char* tsdatatype_name(common::TSDataType t) { + switch (t) { + case common::BOOLEAN: + return "BOOLEAN"; + case common::INT32: + return "INT32"; + case common::INT64: + return "INT64"; + case common::FLOAT: + return "FLOAT"; + case common::DOUBLE: + return "DOUBLE"; + case common::TEXT: + return "TEXT"; + case common::VECTOR: + return "VECTOR"; + case common::UNKNOWN: + return "UNKNOWN"; + case common::TIMESTAMP: + return "TIMESTAMP"; + case common::DATE: + return "DATE"; + case common::BLOB: + return "BLOB"; + case common::STRING: + return "STRING"; + case common::NULL_TYPE: + return "NULL"; + case common::INVALID_DATATYPE: + default: + return "INVALID"; + } +} + +const char* tsencoding_name(common::TSEncoding e) { + switch (e) { + case common::PLAIN: + return "PLAIN"; + case common::DICTIONARY: + return "DICTIONARY"; + case common::RLE: + return "RLE"; + case common::DIFF: + return "DIFF"; + case common::TS_2DIFF: + return "TS_2DIFF"; + case common::BITMAP: + return "BITMAP"; + case common::GORILLA_V1: + return "GORILLA_V1"; + case common::REGULAR: + return "REGULAR"; + case common::GORILLA: + return "GORILLA"; + case common::ZIGZAG: + return "ZIGZAG"; + case common::FREQ: + return "FREQ"; + case common::SPRINTZ: + return "SPRINTZ"; + case common::INVALID_ENCODING: + default: + return "UNKNOWN"; + } +} + +const char* compression_name(common::CompressionType c) { + switch (c) { + case common::UNCOMPRESSED: + return "UNCOMPRESSED"; + case common::SNAPPY: + return "SNAPPY"; + case common::GZIP: + return "GZIP"; + case common::LZO: + return "LZO"; + case common::SDT: + return "SDT"; + case common::PAA: + return "PAA"; + case common::PLA: + return "PLA"; + case common::LZ4: + return "LZ4"; + case common::INVALID_COMPRESSION: + default: + return "UNKNOWN"; + } +} + +std::string csv_escape(const std::string& field) { + bool needs_quote = field.find_first_of(",\"\n\r") != std::string::npos; + if (!needs_quote) { + return field; + } + std::string out = "\""; + for (char c : field) { + if (c == '"') { + out += "\"\""; + } else { + out += c; + } + } + out += "\""; + return out; +} + +std::string json_escape(const std::string& s) { + std::string out; + out.reserve(s.size() + 2); + for (unsigned char c : s) { + switch (c) { + case '"': + out += "\\\""; + break; + case '\\': + out += "\\\\"; + break; + case '\b': + out += "\\b"; + break; + case '\f': + out += "\\f"; + break; + case '\n': + out += "\\n"; + break; + case '\r': + out += "\\r"; + break; + case '\t': + out += "\\t"; + break; + default: + if (c < 0x20) { + char buf[8]; + std::snprintf(buf, sizeof(buf), "\\u%04x", c); + out += buf; + } else { + out += static_cast(c); + } + } + } + return out; +} + +RowWriter::RowWriter(std::ostream& out, OutputFormat fmt, + std::vector header, + std::vector types, bool no_header) + : out_(out), + fmt_(fmt), + header_(std::move(header)), + types_(std::move(types)), + no_header_(no_header) {} + +bool RowWriter::emits_json_bare(size_t col) const { + if (col >= types_.size()) { + return false; + } + switch (types_[col]) { + case common::BOOLEAN: + case common::INT32: + case common::INT64: + case common::FLOAT: + case common::DOUBLE: + case common::TIMESTAMP: + return true; + default: + return false; + } +} + +void RowWriter::ensure_header() { + if (header_done_) { + return; + } + header_done_ = true; + if (no_header_) { + return; + } + const char sep = (fmt_ == OutputFormat::kCsv) ? ',' : '\t'; + for (size_t i = 0; i < header_.size(); ++i) { + if (i) { + out_ << sep; + } + out_ << (fmt_ == OutputFormat::kCsv ? csv_escape(header_[i]) + : header_[i]); + } + out_ << "\n"; +} + +void RowWriter::write(const std::vector& cells, + const std::vector& is_null) { + if (fmt_ == OutputFormat::kTable) { + rows_.push_back(cells); + rows_null_.push_back(is_null); + return; + } + if (fmt_ == OutputFormat::kJson) { + out_ << "{"; + for (size_t i = 0; i < header_.size(); ++i) { + if (i) { + out_ << ","; + } + out_ << "\"" << json_escape(header_[i]) << "\":"; + if (i < is_null.size() && is_null[i]) { + out_ << "null"; + } else if (emits_json_bare(i)) { + out_ << (i < cells.size() ? cells[i] : "null"); + } else { + out_ << "\"" << json_escape(i < cells.size() ? cells[i] : "") + << "\""; + } + } + out_ << "}\n"; + return; + } + + ensure_header(); + const char sep = (fmt_ == OutputFormat::kCsv) ? ',' : '\t'; + for (size_t i = 0; i < cells.size(); ++i) { + if (i) { + out_ << sep; + } + bool null_cell = i < is_null.size() && is_null[i]; + if (null_cell) { + continue; + } + out_ << (fmt_ == OutputFormat::kCsv ? csv_escape(cells[i]) : cells[i]); + } + out_ << "\n"; +} + +void RowWriter::finish() { + if (fmt_ != OutputFormat::kTable) { + if (fmt_ == OutputFormat::kCsv || fmt_ == OutputFormat::kTsv) { + ensure_header(); + } + return; + } + + const size_t ncols = header_.size(); + std::vector width(ncols, 0); + if (!no_header_) { + for (size_t i = 0; i < ncols; ++i) { + width[i] = header_[i].size(); + } + } + for (const auto& row : rows_) { + for (size_t i = 0; i < ncols && i < row.size(); ++i) { + width[i] = std::max(width[i], row[i].size()); + } + } + + auto emit = [&](const std::vector& cells, + const std::vector& nulls) { + for (size_t i = 0; i < ncols; ++i) { + std::string cell = + (i < cells.size() && !(i < nulls.size() && nulls[i])) ? cells[i] + : ""; + out_ << cell; + if (i + 1 < ncols) { + out_ << std::string(width[i] - cell.size() + 2, ' '); + } + } + out_ << "\n"; + }; + + if (!no_header_) { + std::vector no_nulls(ncols, false); + emit(header_, no_nulls); + } + for (size_t r = 0; r < rows_.size(); ++r) { + emit(rows_[r], rows_null_[r]); + } +} + +} // namespace tsfile_cli diff --git a/cpp/tools/format/output_format.h b/cpp/tools/format/output_format.h new file mode 100644 index 000000000..c7efdd190 --- /dev/null +++ b/cpp/tools/format/output_format.h @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_OUTPUT_FORMAT_H +#define TSFILE_CLI_OUTPUT_FORMAT_H + +#include +#include +#include + +#include "cli/cli_args.h" +#include "common/db_common.h" + +namespace tsfile_cli { + +enum class OutputFormat { kCsv, kTsv, kJson, kTable }; + +OutputFormat resolve_format(ParsedArgs::Format f, bool stdout_is_tty); + +// Translate a storage-engine error code (common::E_*) into a human-readable +// phrase so CLI diagnostics carry meaning instead of a bare numeric code. +const char* error_code_message(int code); + +const char* tsdatatype_name(common::TSDataType t); +const char* tsencoding_name(common::TSEncoding e); +const char* compression_name(common::CompressionType c); + +std::string csv_escape(const std::string& field); +std::string json_escape(const std::string& s); + +class RowWriter { + public: + RowWriter(std::ostream& out, OutputFormat fmt, + std::vector header, + std::vector types, bool no_header); + + void write(const std::vector& cells, + const std::vector& is_null); + void finish(); + + private: + void ensure_header(); + bool emits_json_bare(size_t col) const; + + std::ostream& out_; + OutputFormat fmt_; + std::vector header_; + std::vector types_; + bool no_header_; + bool header_done_ = false; + std::vector> rows_; + std::vector> rows_null_; +}; + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_OUTPUT_FORMAT_H diff --git a/cpp/tools/format/result_set_format.cc b/cpp/tools/format/result_set_format.cc new file mode 100644 index 000000000..104ec5ee2 --- /dev/null +++ b/cpp/tools/format/result_set_format.cc @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "format/result_set_format.h" + +#include +#include +#include +#include +#include + +#include "utils/errno_define.h" + +namespace tsfile_cli { + +std::string cell_to_string(storage::ResultSet* rs, uint32_t i, + common::TSDataType type) { + std::ostringstream ss; + switch (type) { + case common::BOOLEAN: + return rs->get_value(i) ? "true" : "false"; + case common::INT32: + ss << rs->get_value(i); + return ss.str(); + case common::INT64: + case common::TIMESTAMP: + ss << rs->get_value(i); + return ss.str(); + case common::FLOAT: + ss << rs->get_value(i); + return ss.str(); + case common::DOUBLE: + ss << rs->get_value(i); + return ss.str(); + case common::DATE: { + std::tm d = rs->get_value(i); + char buf[16]; + std::snprintf(buf, sizeof(buf), "%04d-%02d-%02d", d.tm_year + 1900, + d.tm_mon + 1, d.tm_mday); + return buf; + } + case common::TEXT: + case common::STRING: + case common::BLOB: { + common::String* s = rs->get_value(i); + return s == nullptr ? std::string() : s->to_std_string(); + } + default: + return ""; + } +} + +int emit_result_set(storage::ResultSet* rs, OutputFormat fmt, bool no_header, + std::ostream& out, long long offset, long long limit) { + auto meta = rs->get_metadata(); + const uint32_t ncol = meta->get_column_count(); + std::vector header; + std::vector types; + header.reserve(ncol); + types.reserve(ncol); + for (uint32_t i = 1; i <= ncol; ++i) { + header.push_back(meta->get_column_name(i)); + types.push_back(meta->get_column_type(i)); + } + + RowWriter writer(out, fmt, header, types, no_header); + bool has_next = false; + int code = common::E_OK; + long long skipped = 0; + long long emitted = 0; + while ((code = rs->next(has_next)) == common::E_OK && has_next) { + if (skipped < offset) { + ++skipped; + continue; + } + if (limit >= 0 && emitted >= limit) { + break; + } + std::vector cells(ncol); + std::vector nulls(ncol, false); + for (uint32_t i = 1; i <= ncol; ++i) { + if (rs->is_null(i)) { + nulls[i - 1] = true; + } else { + cells[i - 1] = cell_to_string(rs, i, types[i - 1]); + } + } + writer.write(cells, nulls); + ++emitted; + } + writer.finish(); + return code; +} + +namespace { + +struct BufferedRow { + std::vector cells; + std::vector nulls; +}; + +BufferedRow read_current_row(storage::ResultSet* rs, + const std::vector& types) { + BufferedRow row; + const uint32_t ncol = static_cast(types.size()); + row.cells.assign(ncol, ""); + row.nulls.assign(ncol, false); + for (uint32_t i = 1; i <= ncol; ++i) { + if (rs->is_null(i)) { + row.nulls[i - 1] = true; + } else { + row.cells[i - 1] = cell_to_string(rs, i, types[i - 1]); + } + } + return row; +} + +} // namespace + +int emit_result_set_sampled(storage::ResultSet* rs, OutputFormat fmt, + bool no_header, std::ostream& out, long long limit, + unsigned long long seed) { + auto meta = rs->get_metadata(); + const uint32_t ncol = meta->get_column_count(); + std::vector header; + std::vector types; + header.reserve(ncol); + types.reserve(ncol); + for (uint32_t i = 1; i <= ncol; ++i) { + header.push_back(meta->get_column_name(i)); + types.push_back(meta->get_column_type(i)); + } + + std::vector reservoir; + reservoir.reserve(static_cast(limit)); + std::mt19937_64 rng(seed); + bool has_next = false; + int code = common::E_OK; + long long seen = 0; + while ((code = rs->next(has_next)) == common::E_OK && has_next) { + BufferedRow row = read_current_row(rs, types); + if (static_cast(reservoir.size()) < limit) { + reservoir.push_back(row); + } else { + std::uniform_int_distribution dist(0, seen); + long long idx = dist(rng); + if (idx < limit) { + reservoir[static_cast(idx)] = row; + } + } + ++seen; + } + + RowWriter writer(out, fmt, header, types, no_header); + for (const BufferedRow& row : reservoir) { + writer.write(row.cells, row.nulls); + } + writer.finish(); + return code; +} + +} // namespace tsfile_cli diff --git a/cpp/tools/format/result_set_format.h b/cpp/tools/format/result_set_format.h new file mode 100644 index 000000000..a9fb2a4b1 --- /dev/null +++ b/cpp/tools/format/result_set_format.h @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TSFILE_CLI_RESULT_SET_FORMAT_H +#define TSFILE_CLI_RESULT_SET_FORMAT_H + +#include +#include + +#include "common/db_common.h" +#include "format/output_format.h" +#include "reader/result_set.h" + +namespace tsfile_cli { + +std::string cell_to_string(storage::ResultSet* rs, uint32_t col_index, + common::TSDataType type); + +int emit_result_set(storage::ResultSet* rs, OutputFormat fmt, bool no_header, + std::ostream& out, long long offset = 0, + long long limit = -1); + +int emit_result_set_sampled(storage::ResultSet* rs, OutputFormat fmt, + bool no_header, std::ostream& out, long long limit, + unsigned long long seed); + +} // namespace tsfile_cli + +#endif // TSFILE_CLI_RESULT_SET_FORMAT_H diff --git a/cpp/tools/skills/tsfile-cli/SKILL.md b/cpp/tools/skills/tsfile-cli/SKILL.md new file mode 100644 index 000000000..080674d07 --- /dev/null +++ b/cpp/tools/skills/tsfile-cli/SKILL.md @@ -0,0 +1,110 @@ + + +--- +name: tsfile-cli +description: Use when you need to inspect, preview, export, OR import an Apache TsFile (.tsfile) from the command line — list devices/tables, dump schema, read file/series metadata, count rows, sample/preview rows, or write CSV/TSV into a new .tsfile — via the project's C++ `tsfile-cli` in cpp/tools. +--- + +# tsfile-cli + +Single pipe-friendly C++ binary to inspect **and** import `.tsfile` (TsFile's analogue of +`parquet-cli`/`pqrs`). Source `cpp/tools/`. Read data → stdout, diagnostics → stderr; +`write` imports CSV/TSV → a new file. + +## Binary + +- Name `tsfile-cli` (CMake target `tsfile_cli`). Find: `ls cpp/build/*/bin/tsfile-cli`. +- Build only if missing: `cd cpp && bash build.sh -t=Debug`. +- CMake ≥4 aborts on bundled ANTLR4 (`Policy CMP00xx ... OLD`) → add `--disable-antlr4` + (reader/CLI don't use ANTLR4). + +## Read + +`tsfile-cli [opts] ` · `tsfile-cli --help | --version | help` + +| cmd | output | scans pages | +|---|---|---| +| `ls` | device (tree) / table (table) per line | no | +| `schema` | `target,measurement,datatype,encoding,compression` | no | +| `meta` | model, device/table/series counts, time range, size | no | +| `stats` | per-series `count,start,end,min,max,first,last,sum` | no | +| `count` | per-series counts + `total` row | no | +| `head` | first N rows (default 10, `-n`) | yes | +| `cat` | all matching rows (streamed) | yes | +| `sample` | reservoir sample (default 10, `-n` + `--seed`) | yes | + +Prefer no-scan verbs (`ls/schema/meta/stats/count`) — cheap and never hit the page-decode caveat. + +``` +opts: -f csv|tsv|json|table (default TTY→table, pipe→tsv) + -d | -t (mutually exclusive) + -m a,b,c (projection) · -n N · --offset N · --start · --end (inclusive) + --seed N · --no-header · --model tree|table (else auto) +applies: -m → schema/head/cat/sample · -d/-t → row cmds/schema/stats/count · --offset ∉ sample +json=NDJSON (num/bool bare, else quoted, null→null) · csv=RFC4180 · ts=raw epoch ms +exit: 0 ok · 1 usage · 2 file open/corrupt · 3 query/runtime +``` + +```sh +B=cpp/build/Debug/bin/tsfile-cli +$B meta data.tsfile; $B count -t table1 -f tsv data.tsfile +$B cat -m temp --start 1700000000000 -f csv data.tsfile 2>/dev/null | head +``` + +## Write + +`tsfile-cli write --table --columns -o [-f csv|tsv] [--no-header] [--header-match] [-v] [ | -]` + +Imports rows into a **new table-model** file (overwritten). Input col 0 = timestamp +(epoch ms, int); remaining cols declared by `--columns` — **no type inference**. + +``` +spec := col (',' col)* +col := name ':' TYPE ':' ('tag' | 'field') # TYPE case-insensitive +TYPE ∈ { BOOLEAN, INT32, INT64, FLOAT, DOUBLE, STRING, TEXT } +input := file | '-' | omitted # '-' or omitted = stdin +``` + +- `-o` required (overwritten, must differ from input); `-f` default csv (json/table → usage error). +- header: first line skipped by default · `--no-header` if none · `--header-match` validates + header names vs `--columns` (mutually exclusive with `--no-header`). +- empty cell = null · `--table` is lower-cased · success **silent**, `-v` → `wrote N rows to ` on stderr. +- **timestamps must be strictly increasing per device** (device = tag-column values); rows for + different tags may interleave/reuse timestamps. Out-of-order input → error with line number. +- a failed import deletes its partial output (no half-written `.tsfile` left behind). +- exit: `1` usage (missing `--table`/`--columns`/`-o`, bad spec, dup column, read-only flag) · `2` IO open · `3` row (field-count / type / overflow / timestamp-order / header mismatch). + +```sh +printf 'time,id1,s1\n0,dev,0\n1,dev,10\n' \ + | tsfile-cli write --table t1 --columns "id1:STRING:tag,s1:INT64:field" -o out.tsfile - +tsfile-cli count -f tsv out.tsfile # -> t1.dev s1 2 +``` + +Tree-model / JSON / programmatic writes → C++ SDK `cpp/examples/cpp_examples/demo_write.cpp` +(`TsFileTableWriter`/`TsFileWriter` + `Tablet`); Java/Python writers under `java/`, `python/`. + +## Caveats + +- `head`/`cat`/`sample` decode pages → may abort (`decode_cur_time_page_data`, exit 134) on + some aligned files incl. bundled `cpp/examples/test_cpp.tsfile`. Storage-engine/file issue, + not a CLI bug; metadata verbs still work. Use a well-formed (e.g. self-written) file for rows. +- table-model `target` is derived from tag bytes → may show non-printable chars in `stats/count/schema`. +- `schema` lists all columns; `meta/stats/count` count only field series → `series_count` can be + fewer than `schema` rows (not a bug). diff --git a/cpp/tools/tools_main.cc b/cpp/tools/tools_main.cc new file mode 100644 index 000000000..97c815f47 --- /dev/null +++ b/cpp/tools/tools_main.cc @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include "cli/run_cli.h" + +int main(int argc, char** argv) { + std::vector args(argv + 1, argv + argc); + return tsfile_cli::run_cli(args, std::cout, std::cerr); +}