|
20 | 20 | #include <gen_cpp/parquet_types.h> |
21 | 21 |
|
22 | 22 | #include "common/cast_set.h" |
| 23 | +#include "runtime/primitive_type.h" |
23 | 24 | #include "vec/columns/column_varbinary.h" |
24 | 25 | #include "vec/core/extended_types.h" |
25 | 26 | #include "vec/core/field.h" |
@@ -354,6 +355,86 @@ class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { |
354 | 355 | } |
355 | 356 | }; |
356 | 357 |
|
| 358 | +class Float16PhysicalConverter : public PhysicalToLogicalConverter { |
| 359 | +private: |
| 360 | + int _type_length; |
| 361 | + |
| 362 | +public: |
| 363 | + Float16PhysicalConverter(int type_length) : _type_length(type_length) { |
| 364 | + DCHECK_EQ(_type_length, 2); |
| 365 | + } |
| 366 | + |
| 367 | + Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
| 368 | + ColumnPtr from_col = remove_nullable(src_physical_col); |
| 369 | + MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
| 370 | + |
| 371 | + const auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
| 372 | + size_t length = src_data->size(); |
| 373 | + size_t num_values = length / _type_length; |
| 374 | + auto* to_float_column = assert_cast<ColumnFloat32*>(to_col.get()); |
| 375 | + const uint8_t* ptr = src_data->get_data().data(); |
| 376 | + for (int i = 0; i < num_values; ++i) { |
| 377 | + size_t offset = i * _type_length; |
| 378 | + const uint8_t* data_ptr = ptr + offset; |
| 379 | + uint16_t raw; |
| 380 | + memcpy(&raw, data_ptr, sizeof(uint16_t)); |
| 381 | + float value = half_to_float(raw); |
| 382 | + to_float_column->insert_value(value); |
| 383 | + } |
| 384 | + |
| 385 | + return Status::OK(); |
| 386 | + } |
| 387 | + |
| 388 | + float half_to_float(uint16_t h) { |
| 389 | + // uint16_t h: half precision floating point |
| 390 | + // bit 15: sign(1 bit) |
| 391 | + // bits 14..10 : exponent(5 bits) |
| 392 | + // bits 9..0 : mantissa(10 bits) |
| 393 | + |
| 394 | + // sign bit placed to float32 bit31 |
| 395 | + uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000 |
| 396 | + // exponent:(5 bits) |
| 397 | + uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask) |
| 398 | + // mantissa(10 bits) |
| 399 | + uint32_t mant = (h & 0x03FFU); // 10-bit fraction |
| 400 | + |
| 401 | + // cases:Zero/Subnormal, Normal, Inf/NaN |
| 402 | + if (exp == 0) { |
| 403 | + // exp==0: Zero or Subnormal ---------- |
| 404 | + if (mant == 0) { |
| 405 | + // ±0.0 |
| 406 | + // sign = either 0x00000000 or 0x80000000 |
| 407 | + return std::bit_cast<float>(sign); |
| 408 | + } else { |
| 409 | + // ---------- Subnormal ---------- |
| 410 | + // half subnormal: |
| 411 | + // value = (-1)^sign * (mant / 2^10) * 2^(1 - bias) |
| 412 | + // half bias = 15 → exponent = 1 - 15 = -14 |
| 413 | + float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F); |
| 414 | + return sign ? -f : f; |
| 415 | + } |
| 416 | + } else if (exp == 0x1F) { |
| 417 | + // exp==31: Inf or NaN ---------- |
| 418 | + // float32: |
| 419 | + // exponent = 255 (0xFF) |
| 420 | + // mantissa = mant << 13 |
| 421 | + uint32_t f = sign | 0x7F800000U | (mant << 13); |
| 422 | + return std::bit_cast<float>(f); |
| 423 | + } else { |
| 424 | + // Normalized ---------- |
| 425 | + // float32 exponent: |
| 426 | + // exp32 = exp16 - bias16 + bias32 |
| 427 | + // bias16 = 15 |
| 428 | + // bias32 = 127 |
| 429 | + // |
| 430 | + // so: exp32 = exp + (127 - 15) |
| 431 | + uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent |
| 432 | + | (mant << 13); // mantissa align to 23 bits |
| 433 | + return std::bit_cast<float>(f); |
| 434 | + } |
| 435 | + } |
| 436 | +}; |
| 437 | + |
357 | 438 | class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { |
358 | 439 | public: |
359 | 440 | UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {} |
|
0 commit comments