diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake index 207c92a2607364..9bb7b8ba748769 100644 --- a/be/cmake/thirdparty.cmake +++ b/be/cmake/thirdparty.cmake @@ -68,6 +68,7 @@ add_thirdparty(curl) add_thirdparty(lz4) add_thirdparty(thrift) add_thirdparty(thriftnb) +add_thirdparty(crc32c) add_thirdparty(libevent_core LIBNAME "lib/libevent_core.a") add_thirdparty(libevent_openssl LIBNAME "lib/libevent_openssl.a") diff --git a/be/src/exprs/block_bloom_filter.hpp b/be/src/exprs/block_bloom_filter.hpp index 1d00da3b648267..6736d1e18d4b5a 100644 --- a/be/src/exprs/block_bloom_filter.hpp +++ b/be/src/exprs/block_bloom_filter.hpp @@ -76,7 +76,7 @@ class BlockBloomFilter { // Same as above with convenience of hashing the key. void insert(const StringRef& key) noexcept { if (key.data) { - insert(HashUtil::crc_hash(key.data, uint32_t(key.size), _hash_seed)); + insert(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed)); } } @@ -105,7 +105,7 @@ class BlockBloomFilter { // Same as above with convenience of hashing the key. bool find(const StringRef& key) const noexcept { if (key.data) { - return find(HashUtil::crc_hash(key.data, uint32_t(key.size), _hash_seed)); + return find(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed)); } return false; } diff --git a/be/src/util/crc32c.cpp b/be/src/util/crc32c.cpp index 7ad31e7f22657f..3a2b91ddca9d73 100644 --- a/be/src/util/crc32c.cpp +++ b/be/src/util/crc32c.cpp @@ -15,259 +15,27 @@ // specific language governing permissions and limitations // under the License. -// the following code are modified from RocksDB: -// https://github.com/facebook/rocksdb/blob/master/util/crc32c.cc +#include "util/crc32c.h" -// IWYU pragma: no_include -#include -#include - -#include "util/coding.h" +#include namespace doris { namespace crc32c { -static const uint32_t table0_[256] = { - 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, - 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, - 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, - 0x25afd373, 0x36ff2087, 0xc494a384, 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, - 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, - 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512, - 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, 0x30e349b1, - 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, - 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, - 0x6ef07595, 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, - 0x67dafa54, 0x95b17957, 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, - 0xfe53516f, 0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, - 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, - 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, 0x61c69362, 0x93ad1061, - 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, 0xeb1fcbad, - 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, - 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, - 0xa55230e6, 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, - 0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, - 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, - 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, 0x92a8fc17, 0x60c37f14, 0x73938ce0, - 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, - 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, - 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, - 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, - 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, - 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, - 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, - 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, - 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8, - 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, 0x590ab964, - 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, - 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, - 0x37faccf1, 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, - 0x4f48173d, 0xbd23943e, 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, - 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, - 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351}; -static const uint32_t table1_[256] = { - 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, - 0x7a6dc945, 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, 0xd39ea264, 0xc03c3a13, - 0xf4db928a, 0xe7790afd, 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, 0x714f905d, - 0x62ed082a, 0x560aa0b3, 0x45a838c4, 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, - 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, 0x7f8be302, 0x6c297b75, 0x58ced3ec, - 0x4b6c4b9b, 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, 0xe29f20ba, 0xf13db8cd, - 0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, 0x404e1283, - 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, - 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, 0x93d0b0e7, 0x80722890, 0xb4958009, - 0xa737187e, 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, - 0x96d89736, 0x857a0f41, 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, - 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, - 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, - 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, 0x809c2506, 0x933ebd71, - 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, 0x1d88e6be, - 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, - 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, - 0xc5341dc2, 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94, - 0x4b82460d, 0x5820de7a, 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, 0xb5499b25, - 0xa6eb0352, 0x920cabcb, 0x81ae33bc, 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, - 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, 0xc4060b78, 0xd7a4930f, 0xe3433b96, - 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, 0x5912c8c0, 0x4ab050b7, - 0x7e57f82e, 0x6df56059, 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, 0x844819fb, - 0x97ea818c, 0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, - 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8, 0x70938b71, - 0x63311306, 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, 0xf50789a6, 0xe6a511d1, - 0xd242b948, 0xc1e0213f, 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, 0x68134a1e, - 0x7bb1d269, 0x4f567af0, 0x5cf4e287, 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, - 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, 0x99c0ff45, 0x8a626732, 0xbe85cfab, - 0xad2757dc, 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, 0x3b11cd7c, 0x28b3550b, - 0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, 0xa6050ec4, - 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, - 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, 0x35d5be23, 0x26772654, 0x12908ecd, - 0x013216ba, 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, - 0x8f844d75, 0x9c26d502, 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, - 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, - 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483}; -static const uint32_t table2_[256] = { - 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664, 0xd1b1f617, - 0x74f06469, 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, 0xa68f9adf, 0x03ce08a1, - 0xe9e0c8d2, 0x4ca15aac, 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, - 0x4b3d4bee, 0xa1138b9d, 0x045219e3, 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, - 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, 0xe144fb14, 0x4405696a, 0xae2ba919, - 0x0b6a3b67, 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, 0xd915c5d1, 0x7c5457af, - 0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, 0x91e6869e, - 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, - 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, 0x37691c41, 0x92288e3f, 0x78064e4c, - 0xdd47dc32, 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, - 0x16d476ce, 0xb395e4b0, 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, - 0xc4ab8878, 0x2e85480b, 0x8bc4da75, 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, - 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, 0x8f96c396, 0x2ad751e8, 0xc0f9919b, - 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, 0x26217bcd, 0x8360e9b3, - 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, 0x1e704508, - 0xbb31d776, 0x511f1705, 0xf45e857b, 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, - 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, - 0x2273622e, 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6, - 0xbf63ce95, 0x1a225ceb, 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, 0x15f9d359, - 0xb0b84127, 0x5a968154, 0xffd7132a, 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, - 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, - 0x11abcaba, 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, 0xc3d4340c, 0x6695a672, - 0x8cbb6601, 0x29faf47f, 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, 0x6a638c57, - 0xcf221e29, 0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, - 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6, 0x83834485, - 0x26c2d6fb, 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, 0x841f55c7, 0x215ec7b9, - 0xcb7007ca, 0x6e3195b4, 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, - 0x190ff97c, 0xf321390f, 0x5660ab71, 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, - 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, 0x7413c95f, 0xd1525b21, 0x3b7c9b52, - 0x9e3d092c, 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, 0x3ce08a10, 0x99a1186e, - 0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, 0x04b1b4d5, - 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, - 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, 0x33d8a894, 0x96993aea, 0x7cb7fa99, - 0xd9f668e7, 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, - 0x44e6c45c, 0xe1a75622, 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, - 0xe63b4760, 0x0c158713, 0xa954156d, 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, - 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8}; -static const uint32_t table3_[256] = { - 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b, 0xc4451272, - 0x1900b8ca, 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, 0x8d665215, 0x5023f8ad, - 0x32017194, 0xef44db2c, 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, 0x924680ce, - 0x4f032a76, 0x2d21a34f, 0xf06409f7, 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, - 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, 0xd725148b, 0x0a60be33, 0x6842370a, - 0xb5079db2, 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, 0x2161776d, 0xfc24ddd5, - 0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, 0x3e41a5b6, - 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, - 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, 0xb327f7a3, 0x6e625d1b, 0x0c40d422, - 0xd1057e9a, 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, - 0x6fe34d95, 0xb2a6e72d, 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, - 0xfb85a74a, 0x99a72e73, 0x44e284cb, 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, - 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, 0xb4868d3c, 0x69c32784, 0x0be1aebd, - 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, 0x7c834b6c, 0xa1c6e1d4, - 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, 0x8ac7288a, - 0x57828232, 0x35a00b0b, 0xe8e5a1b3, 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, - 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, - 0x8ce7429b, 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc, - 0xa7e68bc5, 0x7aa3217d, 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, 0x2982f8cc, - 0xf4c75274, 0x96e5db4d, 0x4ba071f5, 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, - 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, 0xbbc47802, 0x6681d2ba, 0x04a35b83, - 0xd9e6f13b, 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, 0x4d801be4, 0x90c5b15c, - 0xf2e73865, 0x2fa292dd, 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, 0x8585ddb4, - 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, - 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519, 0xb784ac20, - 0x6ac10698, 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, 0x17c35d7a, 0xca86f7c2, - 0xa8a47efb, 0x75e1d443, 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, 0xe1873e9c, - 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, - 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, 0x0f42f53e, 0xd2075f86, 0xb025d6bf, - 0x6d607c07, 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, 0x106227e5, 0xcd278d5d, - 0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, 0xe6264403, - 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, - 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, 0x5501b3a0, 0x88441918, 0xea669021, - 0x37233a99, 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, - 0x1c22f3c7, 0xc167597f, 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, - 0x6120a825, 0x0302211c, 0xde478ba4, 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, - 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842}; - -// Used to fetch a naturally-aligned 32-bit word in little endian byte-order -static inline uint32_t LE_LOAD32(const uint8_t* p) { - return decode_fixed32_le(p); -} - -#if defined(__SSE4_2__) && (defined(__LP64__) || defined(_WIN64)) -static inline uint64_t LE_LOAD64(const uint8_t* p) { - return decode_fixed64_le(p); +uint32_t Extend(uint32_t crc, const char* data, size_t n) { + return crc32c_extend(crc, (const uint8_t*)data, n); } -#endif -[[maybe_unused]] static inline void Slow_CRC32(uint64_t* l, uint8_t const** p) { - uint32_t c = static_cast(*l ^ LE_LOAD32(*p)); - *p += 4; - *l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^ table1_[(c >> 16) & 0xff] ^ - table0_[c >> 24]; - // DO it twice. - c = static_cast(*l ^ LE_LOAD32(*p)); - *p += 4; - *l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^ table1_[(c >> 16) & 0xff] ^ - table0_[c >> 24]; +uint32_t Value(const char* data, size_t n) { + return crc32c_value((const uint8_t*)data, n); } -static inline void Fast_CRC32(uint64_t* l, uint8_t const** p) { -#if defined(__SSE4_2__) || defined(__aarch64__) -#if (defined(__LP64__) || defined(_WIN64)) && !defined(__aarch64__) - *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); - *p += 8; -#else - *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); - *p += 4; - *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); - *p += 4; -#endif -#else - Slow_CRC32(l, p); -#endif -} - -template -uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) { - const uint8_t* p = reinterpret_cast(buf); - const uint8_t* e = p + size; - uint64_t l = crc ^ 0xffffffffu; - -// Align n to (1 << m) byte boundary -#define CRC_ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1)) - -#define STEP1 \ - do { \ - int c = (l & 0xff) ^ *p++; \ - l = table0_[c] ^ (l >> 8); \ - } while (0) - - // Point x at first 16-byte aligned byte in string. This might be - // just past the end of the string. - const uintptr_t pval = reinterpret_cast(p); - const uint8_t* x = reinterpret_cast(CRC_ALIGN(pval, 4)); - if (x <= e) { - // Process bytes until finished or p is 16-byte aligned - while (p != x) { - STEP1; - } - } - // Process bytes 16 at a time - while ((e - p) >= 16) { - CRC32(&l, &p); - CRC32(&l, &p); - } - // Process bytes 8 at a time - while ((e - p) >= 8) { - CRC32(&l, &p); +uint32_t Value(const std::vector& slices) { + uint32_t crc = 0; + for (const auto& slice : slices) { + crc = crc32c_extend(crc, (const uint8_t*)slice.get_data(), slice.get_size()); } - // Process the last few bytes - while (p != e) { - STEP1; - } -#undef STEP1 -#undef CRC_ALIGN - return static_cast(l ^ 0xffffffffu); -} - -uint32_t Extend(uint32_t crc, const char* buf, size_t size) { -#if defined(__SSE4_2__) || defined(__aarch64__) - return ExtendImpl(crc, buf, size); -#else - return ExtendImpl(crc, buf, size); -#endif + return crc; } } // namespace crc32c diff --git a/be/src/util/crc32c.h b/be/src/util/crc32c.h index 0e12fe3961d264..75ac6a59128f9d 100644 --- a/be/src/util/crc32c.h +++ b/be/src/util/crc32c.h @@ -15,14 +15,8 @@ // specific language governing permissions and limitations // under the License. -// the following code are modified from RocksDB: -// https://github.com/facebook/rocksdb/blob/master/util/crc32c.h - #pragma once -#include -#include - #include #include "util/slice.h" @@ -30,24 +24,13 @@ namespace doris { namespace crc32c { -// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the -// crc32c of some string A. Extend() is often used to maintain the -// crc32c of a stream of data. -extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); +uint32_t Extend(uint32_t crc, const char* data, size_t n); // Return the crc32c of data[0,n-1] -inline uint32_t Value(const char* data, size_t n) { - return Extend(0, data, n); -} +uint32_t Value(const char* data, size_t n); // Return the crc32c of data content in all slices -inline uint32_t Value(const std::vector& slices) { - uint32_t crc = 0; - for (auto& slice : slices) { - crc = Extend(crc, slice.get_data(), slice.get_size()); - } - return crc; -} +uint32_t Value(const std::vector& slices); } // namespace crc32c } // namespace doris diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index 32fd1ceac3ae4e..11892fa4321496 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -30,6 +30,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "util/cpu_info.h" +#include "util/crc32c.h" #include "util/hash/city.h" #include "util/murmur_hash3.h" #include "util/sse_util.hpp" @@ -49,7 +50,12 @@ class HashUtil { return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4); } -#if defined(__SSE4_2__) || defined(__aarch64__) + // ATTN: crc32c's result is different with zlib_crc32 coz of different polynomial + // crc32c have better performance than zlib_crc32/crc_hash + static uint32_t crc32c_hash(const void* data, uint32_t bytes, uint32_t hash) { + return crc32c::Extend(hash, static_cast(data), bytes); + } + // Compute the Crc32 hash for data using SSE4 instructions. The input hash parameter is // the current hash/seed value. // This should only be called if SSE is supported. @@ -59,6 +65,8 @@ class HashUtil { // NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn. // TODO: crc32 hashes with different seeds do not result in different hash functions. // The resulting hashes are correlated. + // ATTN: prefer do not use this function anymore, use crc32c_hash instead + // This function is retained because it is not certain whether there are compatibility issues with historical data. static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) { if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) { return zlib_crc_hash(data, bytes, hash); @@ -117,11 +125,6 @@ class HashUtil { return converter.u64; } -#else - static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) { - return zlib_crc_hash(data, bytes, hash); - } -#endif // refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java static const uint32_t MURMUR3_32_SEED = 104729; diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index a2ff3e781412ad..63dc41bfd51036 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -345,7 +345,7 @@ class ColumnDictI32 final : public COWHelper { if (type == FieldType::OLAP_FIELD_TYPE_CHAR) { len = strnlen(sv.data, sv.size); } - uint32_t hash_val = HashUtil::crc_hash(sv.data, static_cast(len), 0); + uint32_t hash_val = HashUtil::crc32c_hash(sv.data, static_cast(len), 0); _hash_values[code] = hash_val; _compute_hash_value_flags[code] = 1; return _hash_values[code]; diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 343d0d28d45e81..d2549ca07e288c 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -4598,7 +4598,7 @@ class FunctionNgramSearch : public IFunction { uint32_t sub_str_hash(const char* data, int32_t length) const { constexpr static uint32_t seed = 0; - return HashUtil::crc_hash(data, length, seed); + return HashUtil::crc32c_hash(data, length, seed); } template