Refactor ConvInteger registration and fix lint warnings

rivkastroh · rivkastroh · commit 76c408278220 · 2025-11-18T08:11:52.000+02:00
diff --git a/onnxruntime/core/providers/cpu/quantization/conv_integer.cc b/onnxruntime/core/providers/cpu/quantization/conv_integer.cc
@@ -23,165 +23,168 @@ class ConvInteger : public OpKernel {
 
  private:
   template <typename XT, typename WT>
-  Status ComputeInner(OpKernelContext* context) const {
-    const auto input_defs = Node().InputDefs();
-    size_t num_inputs = input_defs.size();
-    const auto* X = context->Input<Tensor>(0);
-    const auto* W = context->Input<Tensor>(1);
-    uint8_t input_offset = 0;
-    uint8_t filter_offset = 0;
-    if (num_inputs >= 3 && input_defs[2]->Exists()) {
-      const auto* X_Zero_Point = context->Input<Tensor>(2);
-      ORT_ENFORCE(IsScalarOr1ElementVector(X_Zero_Point), "Must be a scalar or 1D tensor or size 1.");
-      input_offset = *static_cast<const uint8_t*>(X_Zero_Point->DataRaw());
-    }
-    if (num_inputs >= 4 && input_defs[3]->Exists()) {
-      const auto* W_Zero_Point = context->Input<Tensor>(3);
-      ORT_ENFORCE(IsScalarOr1ElementVector(W_Zero_Point), "Non per-tensor quantization is not supported now.");
-      filter_offset = *static_cast<const uint8_t*>(W_Zero_Point->DataRaw());
-    }
+  Status ComputeInner(OpKernelContext* context) const;
+};
 
-    const int64_t N = X->Shape()[0];
-    const int64_t C = X->Shape()[1];
-    const int64_t M = W->Shape()[0];
-    ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W));
+ONNX_OPERATOR_KERNEL_EX(
+    ConvInteger,
+    kOnnxDomain,
+    10,
+    kCpuExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T1", {DataTypeImpl::GetTensorType<uint8_t>(),
+                               DataTypeImpl::GetTensorType<int8_t>()})
+        .TypeConstraint("T2", {DataTypeImpl::GetTensorType<uint8_t>(),
+                               DataTypeImpl::GetTensorType<int8_t>()})
+        .TypeConstraint("T3", DataTypeImpl::GetTensorType<int32_t>()),
+    ConvInteger);
 
-    TensorShapeVector kernel_shape;
-    ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape));
+template <typename XT, typename WT>
+Status ConvInteger::ComputeInner(OpKernelContext* context) const {
+  const auto input_defs = Node().InputDefs();
+  size_t num_inputs = input_defs.size();
+  const auto* X = context->Input<Tensor>(0);
+  const auto* W = context->Input<Tensor>(1);
+  uint8_t input_offset = 0;
+  uint8_t filter_offset = 0;
+  if (num_inputs >= 3 && input_defs[2]->Exists()) {
+    const auto* X_Zero_Point = context->Input<Tensor>(2);
+    ORT_ENFORCE(IsScalarOr1ElementVector(X_Zero_Point), "Must be a scalar or 1D tensor or size 1.");
+    input_offset = *static_cast<const uint8_t*>(X_Zero_Point->DataRaw());
+  }
+  if (num_inputs >= 4 && input_defs[3]->Exists()) {
+    const auto* W_Zero_Point = context->Input<Tensor>(3);
+    ORT_ENFORCE(IsScalarOr1ElementVector(W_Zero_Point), "Non per-tensor quantization is not supported now.");
+    filter_offset = *static_cast<const uint8_t*>(W_Zero_Point->DataRaw());
+  }
 
-    ConvPadVector pads(conv_attrs_.pads);
-    if (pads.empty()) {
-      pads.resize(kernel_shape.size() * 2, 0);
-    }
-    TensorShapeVector dilations(conv_attrs_.dilations);
-    if (dilations.empty()) {
-      dilations.resize(kernel_shape.size(), 1);
-    }
-    TensorShapeVector strides(conv_attrs_.strides);
-    if (strides.empty()) {
-      strides.resize(kernel_shape.size(), 1);
-    }
+  const int64_t N = X->Shape()[0];
+  const int64_t C = X->Shape()[1];
+  const int64_t M = W->Shape()[0];
+  ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X, W));
 
-    TensorShapeVector Y_dims({N, M});
-    TensorShape input_shape = X->Shape().Slice(2);
-    ORT_RETURN_IF_ERROR(conv_attrs_.InferPadsAndOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
-    Tensor* Y = context->Output(0, TensorShape(Y_dims));
-    TensorShape output_shape = Y->Shape().Slice(2);
+  TensorShapeVector kernel_shape;
+  ORT_RETURN_IF_ERROR(conv_attrs_.ComputeKernelShape(W->Shape(), kernel_shape));
 
-    // Bail out early if one of the dimensions is zero.
-    if (Y->Shape().Size() == 0) {
-      return Status::OK();
-    }
+  ConvPadVector pads(conv_attrs_.pads);
+  if (pads.empty()) {
+    pads.resize(kernel_shape.size() * 2, 0);
+  }
+  TensorShapeVector dilations(conv_attrs_.dilations);
+  if (dilations.empty()) {
+    dilations.resize(kernel_shape.size(), 1);
+  }
+  TensorShapeVector strides(conv_attrs_.strides);
+  if (strides.empty()) {
+    strides.resize(kernel_shape.size(), 1);
+  }
 
-    const int64_t input_image_size = input_shape.Size();
-    const int64_t output_image_size = output_shape.Size();
-    const int64_t kernel_size = TensorShape(kernel_shape).Size();
-    const int64_t X_offset = C / conv_attrs_.group * input_image_size;
-    const int64_t Y_offset = Y->Shape().Size() / Y->Shape()[0] / conv_attrs_.group;
-    const int64_t W_offset = W->Shape().Size() / conv_attrs_.group;
-    const int64_t kernel_dim = C / conv_attrs_.group * kernel_size;
-    const int64_t col_buffer_size = kernel_dim * output_image_size;
+  TensorShapeVector Y_dims({N, M});
+  TensorShape input_shape = X->Shape().Slice(2);
+  ORT_RETURN_IF_ERROR(conv_attrs_.InferPadsAndOutputShape(input_shape, kernel_shape, strides, dilations, pads, Y_dims));
+  Tensor* Y = context->Output(0, TensorShape(Y_dims));
+  TensorShape output_shape = Y->Shape().Slice(2);
 
-    const size_t kernel_rank = kernel_shape.size();
+  // Bail out early if one of the dimensions is zero.
+  if (Y->Shape().Size() == 0) {
+    return Status::OK();
+  }
 
-    BufferUniquePtr col_buffer;
+  const int64_t input_image_size = input_shape.Size();
+  const int64_t output_image_size = output_shape.Size();
+  const int64_t kernel_size = TensorShape(kernel_shape).Size();
+  const int64_t X_offset = C / conv_attrs_.group * input_image_size;
+  const int64_t Y_offset = Y->Shape().Size() / Y->Shape()[0] / conv_attrs_.group;
+  const int64_t W_offset = W->Shape().Size() / conv_attrs_.group;
+  const int64_t kernel_dim = C / conv_attrs_.group * kernel_size;
+  const int64_t col_buffer_size = kernel_dim * output_image_size;
 
-    // Pointwise convolutions can use the original input tensor in place,
-    // otherwise a temporary buffer is required for the im2col transform.
-    if (kernel_size != 1 || !conv_attrs_.HasStridesOneAndNoPadding()) {
-      AllocatorPtr alloc;
-      ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
+  const size_t kernel_rank = kernel_shape.size();
 
-      auto* col_data = alloc->Alloc(SafeInt<size_t>(sizeof(uint8_t)) * col_buffer_size);
-      col_buffer = BufferUniquePtr(col_data, BufferDeleter(std::move(alloc)));
-    }
+  BufferUniquePtr col_buffer;
 
-    auto* col_buffer_data = static_cast<uint8_t*>(col_buffer.get());
-
-    concurrency::ThreadPool* thread_pool = context->GetOperatorThreadPool();
-
-    const auto* Xdata = static_cast<const uint8_t*>(X->DataRaw());
-    const auto* Wdata = static_cast<const uint8_t*>(W->DataRaw());
-    auto* Ydata = Y->MutableData<int32_t>();
-
-    for (int image_id = 0; image_id < N; ++image_id) {
-      for (int group_id = 0; group_id < conv_attrs_.group; ++group_id) {
-        if (col_buffer_data != nullptr) {
-          if (kernel_rank == 2) {
-            math::Im2col<XT, StorageOrder::NCHW>()(
-                reinterpret_cast<const XT*>(Xdata),
-                C / conv_attrs_.group,
-                input_shape[0],
-                input_shape[1],
-                kernel_shape[0],
-                kernel_shape[1],
-                dilations[0],
-                dilations[1],
-                pads[0],
-                pads[1],
-                pads[2],
-                pads[3],
-                strides[0],
-                strides[1],
-                reinterpret_cast<XT*>(col_buffer_data),
-                static_cast<XT>(input_offset));
-          } else {
-            math::Im2col<XT, StorageOrder::NCHW>()(
-                reinterpret_cast<const XT*>(Xdata),
-                input_shape.GetDims().data(),
-                output_shape.GetDims().data(),
-                kernel_dim,
-                kernel_shape.data(),
-                strides.data(),
-                dilations.data(),
-                pads.data(),
-                static_cast<int>(kernel_rank),
-                reinterpret_cast<XT*>(col_buffer_data),
-                false,
-                static_cast<XT>(input_offset));
-          }
-        }
+  // Pointwise convolutions can use the original input tensor in place,
+  // otherwise a temporary buffer is required for the im2col transform.
+  if (kernel_size != 1 || !conv_attrs_.HasStridesOneAndNoPadding()) {
+    AllocatorPtr alloc;
+    ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
+
+    auto* col_data = alloc->Alloc(SafeInt<size_t>(sizeof(uint8_t)) * col_buffer_size);
+    col_buffer = BufferUniquePtr(col_data, BufferDeleter(std::move(alloc)));
+  }
 
-        MLAS_GEMM_QUANT_SHAPE_PARAMS gemm_shape;
-        gemm_shape.M = static_cast<size_t>(M / conv_attrs_.group);
-        gemm_shape.N = static_cast<size_t>(output_image_size);
-        gemm_shape.K = static_cast<size_t>(kernel_dim);
-        gemm_shape.AIsSigned = W->IsDataType<int8_t>();
-        gemm_shape.BIsSigned = X->IsDataType<int8_t>();
-
-        MLAS_GEMM_QUANT_DATA_PARAMS gemm_params;
-        gemm_params.A = Wdata + group_id * W_offset;
-        gemm_params.lda = static_cast<size_t>(kernel_dim);
-        gemm_params.ZeroPointA = filter_offset;
-        gemm_params.B = (col_buffer_data == nullptr) ? Xdata : col_buffer_data;
-        gemm_params.ldb = static_cast<size_t>(output_image_size);
-        gemm_params.ZeroPointB = &input_offset;
-        gemm_params.C = Ydata;
-        gemm_params.ldc = static_cast<size_t>(output_image_size);
-
-        MlasGemm(gemm_shape, gemm_params, thread_pool);
-
-        Xdata = reinterpret_cast<const uint8_t*>(X_offset + reinterpret_cast<const XT*>(Xdata));
-        Ydata += Y_offset;
+  auto* col_buffer_data = static_cast<uint8_t*>(col_buffer.get());
+
+  concurrency::ThreadPool* thread_pool = context->GetOperatorThreadPool();
+
+  const auto* Xdata = static_cast<const uint8_t*>(X->DataRaw());
+  const auto* Wdata = static_cast<const uint8_t*>(W->DataRaw());
+  auto* Ydata = Y->MutableData<int32_t>();
+
+  for (int image_id = 0; image_id < N; ++image_id) {
+    for (int group_id = 0; group_id < conv_attrs_.group; ++group_id) {
+      if (col_buffer_data != nullptr) {
+        if (kernel_rank == 2) {
+          math::Im2col<XT, StorageOrder::NCHW>()(
+              reinterpret_cast<const XT*>(Xdata),
+              C / conv_attrs_.group,
+              input_shape[0],
+              input_shape[1],
+              kernel_shape[0],
+              kernel_shape[1],
+              dilations[0],
+              dilations[1],
+              pads[0],
+              pads[1],
+              pads[2],
+              pads[3],
+              strides[0],
+              strides[1],
+              reinterpret_cast<XT*>(col_buffer_data),
+              static_cast<XT>(input_offset));
+        } else {
+          math::Im2col<XT, StorageOrder::NCHW>()(
+              reinterpret_cast<const XT*>(Xdata),
+              input_shape.GetDims().data(),
+              output_shape.GetDims().data(),
+              kernel_dim,
+              kernel_shape.data(),
+              strides.data(),
+              dilations.data(),
+              pads.data(),
+              static_cast<int>(kernel_rank),
+              reinterpret_cast<XT*>(col_buffer_data),
+              false,
+              static_cast<XT>(input_offset));
+        }
       }
-    }
 
-    return Status::OK();
+      MLAS_GEMM_QUANT_SHAPE_PARAMS gemm_shape;
+      gemm_shape.M = static_cast<size_t>(M / conv_attrs_.group);
+      gemm_shape.N = static_cast<size_t>(output_image_size);
+      gemm_shape.K = static_cast<size_t>(kernel_dim);
+      gemm_shape.AIsSigned = W->IsDataType<int8_t>();
+      gemm_shape.BIsSigned = X->IsDataType<int8_t>();
+
+      MLAS_GEMM_QUANT_DATA_PARAMS gemm_params;
+      gemm_params.A = Wdata + group_id * W_offset;
+      gemm_params.lda = static_cast<size_t>(kernel_dim);
+      gemm_params.ZeroPointA = filter_offset;
+      gemm_params.B = (col_buffer_data == nullptr) ? Xdata : col_buffer_data;
+      gemm_params.ldb = static_cast<size_t>(output_image_size);
+      gemm_params.ZeroPointB = &input_offset;
+      gemm_params.C = Ydata;
+      gemm_params.ldc = static_cast<size_t>(output_image_size);
+
+      MlasGemm(gemm_shape, gemm_params, thread_pool);
+
+      Xdata = reinterpret_cast<const uint8_t*>(X_offset + reinterpret_cast<const XT*>(Xdata));
+      Ydata += Y_offset;
+    }
   }
-};
 
-ONNX_OPERATOR_KERNEL_EX(
-    ConvInteger,
-    kOnnxDomain,
-    10,
-    kCpuExecutionProvider,
-    KernelDefBuilder()
-        .TypeConstraint("T1", {DataTypeImpl::GetTensorType<uint8_t>(),
-                               DataTypeImpl::GetTensorType<int8_t>()})
-        .TypeConstraint("T2", {DataTypeImpl::GetTensorType<uint8_t>(),
-                               DataTypeImpl::GetTensorType<int8_t>()})
-        .TypeConstraint("T3", DataTypeImpl::GetTensorType<int32_t>()),
-    ConvInteger);
+  return Status::OK();
+}
 
 Status ConvInteger::Compute(OpKernelContext* context) const {
   const auto* X = context->Input<Tensor>(0);
diff --git a/onnxruntime/test/providers/cpu/nn/conv_integer_test.cc b/onnxruntime/test/providers/cpu/nn/conv_integer_test.cc
@@ -678,9 +678,9 @@ TEST(ConvIntegerTest, WithoutPadding_2D_s8u8) {
 
   std::vector<int64_t> x_dims{1, 1, 3, 3};
   test.AddInput<int8_t>("x", x_dims,
-                        {-1,  2, -3,
-                          4, -5,  6,
-                         -7,  8, -9});
+                        {-1, 2, -3,
+                         4, -5, 6,
+                         -7, 8, -9});
 
   std::vector<int64_t> w_dims{1, 1, 2, 2};
   test.AddInput<uint8_t>("w", w_dims,
@@ -692,8 +692,8 @@ TEST(ConvIntegerTest, WithoutPadding_2D_s8u8) {
 
   std::vector<int64_t> y_dims{1, 1, 2, 2};
   test.AddOutput<int32_t>("y", y_dims,
-                          {-5,  5,
-                            5, -5});
+                          {-5, 5,
+                           5, -5});
 
   test.Run();
 }
@@ -703,9 +703,9 @@ TEST(ConvIntegerTest, WithPadding_2D_s8u8) {
 
   std::vector<int64_t> x_dims{1, 1, 3, 3};
   test.AddInput<int8_t>("x", x_dims,
-                        {-1,  2, -3,
-                          4, -5,  6,
-                         -7,  8, -9});
+                        {-1, 2, -3,
+                         4, -5, 6,
+                         -7, 8, -9});
 
   std::vector<int64_t> w_dims{1, 1, 2, 2};
   test.AddInput<uint8_t>("w", w_dims,
@@ -718,10 +718,10 @@ TEST(ConvIntegerTest, WithPadding_2D_s8u8) {
 
   std::vector<int64_t> y_dims{1, 1, 4, 4};
   test.AddOutput<int32_t>("y", y_dims,
-                          { -4,   5,  -6,  -9,
-                            14,  -5,   5,  15,
-                           -20,   5,  -5, -21,
-                           -14,   9, -10,  -9});
+                          {-4, 5, -6, -9,
+                           14, -5, 5, 15,
+                           -20, 5, -5, -21,
+                           -14, 9, -10, -9});
 
   test.Run();
 }