diff --git a/src/FsMath/Matrix.fs b/src/FsMath/Matrix.fs index 62b2f23..7db50ce 100644 --- a/src/FsMath/Matrix.fs +++ b/src/FsMath/Matrix.fs @@ -798,10 +798,13 @@ type Matrix<'T when 'T :> Numerics.INumber<'T> Array.init m.NumRows (fun i -> Matrix.getRow i m) + /// /// Gets a specific column of the matrix as a vector. - static member inline getCol<'T + /// Optimized with loop unrolling to improve cache utilization and reduce loop overhead. + /// + static member inline getCol<'T when 'T :> Numerics.INumber<'T> - and 'T : struct + and 'T : struct and 'T : (new : unit -> 'T) and 'T :> ValueType> (j: int) @@ -810,30 +813,49 @@ type Matrix<'T when 'T :> Numerics.INumber<'T> if j < 0 || j >= m.NumCols then invalidArg "j" $"Column index out of bounds: {j}" - let result = Array.zeroCreate<'T> m.NumRows + let numRows = m.NumRows + let numCols = m.NumCols + let result = Array.zeroCreate<'T> numRows + let data = m.Data + + // Loop unrolling by 4 for better instruction-level parallelism + // This reduces loop overhead and allows the CPU to prefetch more efficiently + let unrollFactor = 4 + let unrolledEnd = (numRows / unrollFactor) * unrollFactor + let stride = numCols + let mutable offset = j - for i = 0 to m.NumRows - 1 do - result.[i] <- m.Data.[offset] - offset <- offset + m.NumCols + let mutable i = 0 + + // Unrolled loop: process 4 elements per iteration + while i < unrolledEnd do + result.[i] <- data.[offset] + result.[i + 1] <- data.[offset + stride] + result.[i + 2] <- data.[offset + stride * 2] + result.[i + 3] <- data.[offset + stride * 3] + offset <- offset + stride * unrollFactor + i <- i + unrollFactor + + // Handle remaining elements + while i < numRows do + result.[i] <- data.[offset] + offset <- offset + stride + i <- i + 1 result + /// /// Gets all columns of the matrix as an array of vectors. - static member inline getCols<'T + /// Uses the optimized getCol function for each column. + /// + static member inline getCols<'T when 'T :> Numerics.INumber<'T> - and 'T : struct + and 'T : struct and 'T : (new : unit -> 'T) and 'T :> ValueType> (m: Matrix<'T>) : Vector<'T>[] = - Array.init m.NumCols (fun j -> - let col = Array.zeroCreate<'T> m.NumRows - let mutable offset = j - for i = 0 to m.NumRows - 1 do - col.[i] <- m.Data.[offset] - offset <- offset + m.NumCols - col - ) + Array.init m.NumCols (fun j -> Matrix.getCol j m) /// Sets row of this matrix to the contents of .