Skip to content

Commit 8f6eb0e

Browse files
committed
PR hygiene: remove accidental files; revert unrelated changes; keep only the intended test update
1 parent 52017f6 commit 8f6eb0e

File tree

6 files changed

+10
-188
lines changed

6 files changed

+10
-188
lines changed

.gitignore

-898 Bytes
Binary file not shown.

Issue#26432/chec_pytorch.py

Lines changed: 0 additions & 48 deletions
This file was deleted.

Issue#26432/repro_rmsnorm_scalar_scale.py

Lines changed: 0 additions & 52 deletions
This file was deleted.

lint.json

Lines changed: 0 additions & 14 deletions
This file was deleted.

onnxruntime/core/providers/cpu/reduction/reduction_ops.cc

Lines changed: 10 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#endif
1414
using namespace std;
1515
namespace onnxruntime {
16+
1617
#define REGISTER_UNARY_ELEMENTWISE_KERNEL(x, sinceVersion) \
1718
ONNX_CPU_OPERATOR_TYPED_KERNEL( \
1819
x, \
@@ -759,10 +760,14 @@ bool CommonFastReduceCopy(OpKernelContext* ctx, TensorShapeVector& input_axes, b
759760
} else {
760761
input_axes.clear();
761762
}
762-
// noop_with_empty_axes is handled upstream by ApplyNoopEmptyAxesElementwise().
763-
// Return false for clarity and to prevent unsafe memcpy fallback.
763+
764764
if (input_axes.empty() && noop_with_empty_axes) {
765-
return false;
765+
const Tensor* input = ctx->Input<Tensor>(0);
766+
auto* output = ctx->Output(0, input->Shape());
767+
memcpy(output->MutableDataRaw(),
768+
input->DataRaw(),
769+
input->SizeInBytes());
770+
return true;
766771
}
767772
}
768773
return false;
@@ -795,6 +800,7 @@ bool CommonFastReduceSwitch(OpKernelContext* ctx,
795800
fast_kind = OptimizeShapeForFastReduce(
796801
reduced_dims, input_axes.empty() ? axes_ : input_axes,
797802
fast_shape, output_shape, fast_axes, keepdims_ != 0, noop_with_empty_axes);
803+
798804
if (which_fast_reduce != FastReduceKind::kNone) {
799805
if (IsFastReduceKindAvailable(fast_kind, which_fast_reduce)) {
800806
Tensor* output = ctx->Output(0, output_shape);
@@ -914,30 +920,6 @@ bool check_and_reduce_empty_set_input(OpKernelContext* ctx, const gsl::span<cons
914920
return true;
915921
}
916922

917-
template <typename AGG>
918-
inline void ApplyNoopEmptyAxesElementwise(OpKernelContext* ctx) {
919-
const Tensor* X = ctx->Input<Tensor>(0);
920-
const auto& shape = X->Shape();
921-
Tensor* Y = ctx->Output(0, shape);
922-
923-
if constexpr (!ReduceAggTraits<AGG>::kHasPreOp && !ReduceAggTraits<AGG>::kHasPostOp) {
924-
std::memcpy(Y->MutableDataRaw(), X->DataRaw(), X->SizeInBytes());
925-
926-
} else {
927-
using Tin = typename AGG::input_type;
928-
using Tacc = typename AGG::value_type;
929-
const Tin* x = X->Data<Tin>();
930-
Tacc* y = Y->MutableData<Tacc>();
931-
const int64_t n = shape.Size();
932-
933-
for (int64_t i = 0; i < n; ++i) {
934-
AGG agg(1, x[i]);
935-
agg.update(x[i]);
936-
y[i] = agg.get_value();
937-
}
938-
}
939-
}
940-
941923
template <typename AGG>
942924
void CommonReduce1Loop(OpKernelContext* ctx,
943925
const gsl::span<const int64_t>& axes_, int64_t keepdims_,
@@ -946,11 +928,6 @@ void CommonReduce1Loop(OpKernelContext* ctx,
946928
return;
947929
}
948930

949-
if (axes_.empty() && noop_with_empty_axes) {
950-
ApplyNoopEmptyAxesElementwise<AGG>(ctx);
951-
return;
952-
}
953-
954931
FastReduceKind fast_kind;
955932
TensorShapeVector fast_shape;
956933
TensorShapeVector output_shape;
@@ -962,7 +939,6 @@ void CommonReduce1Loop(OpKernelContext* ctx,
962939

963940
const Tensor* input = ctx->Input<Tensor>(0);
964941
Tensor* output = ctx->Output(0, output_shape);
965-
966942
if (fast_kind == FastReduceKind::kEmpty) {
967943
const TensorShape& input_shape = input->Shape();
968944
if (input_shape.Size() == 1) {
@@ -989,11 +965,6 @@ void CommonReduce2Loops(OpKernelContext* ctx,
989965
return;
990966
}
991967

992-
if (axes_.empty() && noop_with_empty_axes) {
993-
ApplyNoopEmptyAxesElementwise<AGG>(ctx);
994-
return;
995-
}
996-
997968
FastReduceKind fast_kind;
998969
TensorShapeVector fast_shape, output_shape, fast_axes;
999970
if (CommonFastReduce<AGG>(ctx, axes_, keepdims_, noop_with_empty_axes,
@@ -1017,6 +988,7 @@ void CommonReduce2Loops(OpKernelContext* ctx,
1017988
}
1018989
return;
1019990
}
991+
1020992
ResultsNoTransposePrepareForReduce last_results;
1021993
NoTransposeReduce2Loops<AGG>(output, fast_shape, *input, fast_axes, ctx->GetOperatorThreadPool(), last_results);
1022994
}

onnxruntime/core/providers/cpu/reduction/reduction_ops.h

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -784,42 +784,6 @@ class ReduceAggregatorLogSumExp : public ReduceAggregator<T, T> {
784784
}
785785
};
786786

787-
template <typename AGG>
788-
struct ReduceAggTraits {
789-
static constexpr bool kHasPreOp = false;
790-
static constexpr bool kHasPostOp = false;
791-
};
792-
793-
template <typename T>
794-
struct ReduceAggTraits<ReduceAggregatorL1<T>> {
795-
static constexpr bool kHasPreOp = true;
796-
static constexpr bool kHasPostOp = false;
797-
};
798-
799-
template <typename T>
800-
struct ReduceAggTraits<ReduceAggregatorL2<T>> {
801-
static constexpr bool kHasPreOp = true;
802-
static constexpr bool kHasPostOp = true;
803-
};
804-
805-
template <typename T, typename TVAL>
806-
struct ReduceAggTraits<ReduceAggregatorSumSquare<T, TVAL>> {
807-
static constexpr bool kHasPreOp = true;
808-
static constexpr bool kHasPostOp = false;
809-
};
810-
811-
template <typename T>
812-
struct ReduceAggTraits<ReduceAggregatorLogSum<T>> {
813-
static constexpr bool kHasPreOp = false;
814-
static constexpr bool kHasPostOp = true;
815-
};
816-
817-
template <typename T>
818-
struct ReduceAggTraits<ReduceAggregatorLogSumExp<T>> {
819-
static constexpr bool kHasPreOp = true;
820-
static constexpr bool kHasPostOp = true;
821-
};
822-
823787
void NoTransposePrepareForReduce(const TensorShape& new_input_shape,
824788
gsl::span<const int64_t> reduced_axes,
825789
ResultsNoTransposePrepareForReduce& results);

0 commit comments

Comments
 (0)