@@ -168,12 +168,41 @@ where
168168
169169 // Horner's method for polynomial evaluation with cost O(n).
170170 fn horner_evaluate ( & self , point : & F ) -> T {
171- self . coeffs
172- . iter ( )
173- . rfold ( T :: zero ( ) , move |mut result, coeff| {
174- result *= * point;
175- result + * coeff
176- } )
171+ // For very small polynomials, use the original implementation
172+ if self . coeffs . len ( ) < 16 {
173+ return self . coeffs
174+ . iter ( )
175+ . rfold ( T :: zero ( ) , move |mut result, coeff| {
176+ result *= * point;
177+ result + * coeff
178+ } ) ;
179+ }
180+
181+ // For larger polynomials, use a more cache-friendly approach
182+ // by processing chunks of the polynomial
183+ let chunk_size = 8 ;
184+ let mut results = Vec :: with_capacity ( ( self . coeffs . len ( ) + chunk_size - 1 ) / chunk_size) ;
185+
186+ // Process each chunk separately
187+ for chunk in self . coeffs . chunks ( chunk_size) {
188+ let mut chunk_result = T :: zero ( ) ;
189+ for coeff in chunk. iter ( ) . rev ( ) {
190+ chunk_result *= * point;
191+ chunk_result = chunk_result + * coeff;
192+ }
193+ results. push ( chunk_result) ;
194+ }
195+
196+ // Combine chunk results
197+ let point_pow_chunk = point. pow ( [ chunk_size as u64 ] ) ;
198+ let mut final_result = * results. last ( ) . unwrap_or ( & T :: zero ( ) ) ;
199+
200+ for & chunk_result in results. iter ( ) . rev ( ) . skip ( 1 ) {
201+ final_result *= point_pow_chunk;
202+ final_result = final_result + chunk_result;
203+ }
204+
205+ final_result
177206 }
178207}
179208
0 commit comments