pid: optimize calculations with pre-computed gains and vec3 operations

bkleiner · bkleiner · commit 566b3b172241 · 2025-06-25T20:37:35.000+02:00
diff --git a/src/core/looptime.c b/src/core/looptime.c
@@ -87,8 +87,8 @@ void looptime_update() {
 
   state.looptime_us = CYCLES_TO_US(time_cycles() - last_loop_cycles);
   state.looptime = state.looptime_us * 1e-6f;
-  // 0.0032f is there for legacy purposes, should be 0.001f = looptime
-  state.timefactor = 0.0032f / state.looptime;
+  // looptime_inverse is the loop frequency (1/looptime)
+  state.looptime_inverse = 1.0f / state.looptime;
   state.loop_counter++;
 
   last_loop_cycles = time_cycles();
diff --git a/src/flight/angle_pid.c b/src/flight/angle_pid.c
@@ -17,10 +17,10 @@ float angle_pid(int x) {
   const float angle_error_abs = fabsf(state.angle_error.axis[x]);
 
   const float small_angle = (1 - angle_error_abs) * state.angle_error.axis[x] * profile.pid.small_angle.kp                                               // P term weighted
-                            + ((state.angle_error.axis[x] - lasterror.axis[x]) * profile.pid.small_angle.kd * (1 - angle_error_abs) * state.timefactor); // D term weighted
+                            + ((state.angle_error.axis[x] - lasterror.axis[x]) * profile.pid.small_angle.kd * (1 - angle_error_abs) * state.looptime_inverse); // D term weighted
 
   const float big_angle = angle_error_abs * state.angle_error.axis[x] * profile.pid.big_angle.kp                                               // P term weighted
-                          + ((state.angle_error.axis[x] - lasterror.axis[x]) * profile.pid.big_angle.kd * angle_error_abs * state.timefactor); // D term weighted
+                          + ((state.angle_error.axis[x] - lasterror.axis[x]) * profile.pid.big_angle.kd * angle_error_abs * state.looptime_inverse); // D term weighted
 
   lasterror.axis[x] = state.angle_error.axis[x];
 
diff --git a/src/flight/control.h b/src/flight/control.h
@@ -44,7 +44,7 @@ typedef struct {
   float looptime;            // looptime in seconds
   float looptime_us;         // looptime in us
   float looptime_autodetect; // desired looptime in us
-  float timefactor;          // timefactor for pid calc
+  float looptime_inverse;    // 1/looptime for derivative calculations
   uint32_t loop_counter;     // number of loops ran
 
   float uptime;      // running sum of looptimes
@@ -116,7 +116,7 @@ typedef struct {
   MEMBER(looptime, float)                     \
   MEMBER(looptime_us, float)                  \
   MEMBER(looptime_autodetect, float)          \
-  MEMBER(timefactor, float)                   \
+  MEMBER(looptime_inverse, float)            \
   MEMBER(loop_counter, uint32_t)              \
   MEMBER(uptime, float)                       \
   MEMBER(armtime, float)                      \
diff --git a/src/flight/pid.c b/src/flight/pid.c
@@ -11,6 +11,7 @@
 #include "flight/filter.h"
 #include "io/led.h"
 #include "util/util.h"
+#include "util/vector.h"
 
 #define PID_SIZE 3
 
@@ -21,20 +22,19 @@
 #define RELAX_FACTOR_YAW (RELAX_FACTOR_YAW_DEG * DEGTORAD)
 
 /// output limit
-static const float out_limit[PID_SIZE] = {0.8, 0.8, 0.6};
+static const vec3_t out_limit = {.roll = 0.8f, .pitch = 0.8f, .yaw = 0.6f};
 
 // limit of integral term (abs)
-static const float integral_limit[PID_SIZE] = {0.8, 0.8, 0.6};
+static const vec3_t integral_limit = {.roll = 0.8f, .pitch = 0.8f, .yaw = 0.6f};
 
-static const float pid_scales[PID_SIZE][PID_SIZE] = {
-    // roll, pitch, yaw
-    {1.0f / 628.0f, 1.0f / 628.0f, 1.0f / 314.0f}, // kp
-    {1.0f / 50.0f, 1.0f / 50.0f, 1.0f / 50.0f},    // ki
-    {1.0f / 120.0f, 1.0f / 120.0f, 1.0f / 120.0f}, // kd
+static const vec3_t pid_scales[PID_SIZE] = {
+    {.roll = 1.0f / 628.0f, .pitch = 1.0f / 628.0f, .yaw = 1.0f / 314.0f}, // kp
+    {.roll = 1.0f / 100.0f, .pitch = 1.0f / 100.0f, .yaw = 1.0f / 100.0f}, // ki - includes historical 0.5x scaling from Silverware
+    {.roll = 1.0f / 37500.0f, .pitch = 1.0f / 37500.0f, .yaw = 1.0f / 37500.0f},    // kd - includes 0.0032 constant (0.0032 / 120 = 1 / 37500)
 };
 
-static float lastrate[PID_SIZE] = {0, 0, 0};
-static float lastsetpoint[PID_SIZE] = {0, 0, 0};
+static vec3_t lastrate = {.roll = 0, .pitch = 0, .yaw = 0};
+static vec3_t lastsetpoint = {.roll = 0, .pitch = 0, .yaw = 0};
 
 static vec3_t ierror;
 static vec3_t last_error;
@@ -58,31 +58,41 @@ void pid_init() {
 }
 
 // (iwindup = 0  windup is not allowed)   (iwindup = 1 windup is allowed)
-static inline float pid_compute_iterm_windup(uint8_t x, float pid_output) {
-  if ((pid_output >= out_limit[x]) && (state.error.axis[x] > 0)) {
-    return 0.0f;
-  }
-  if ((pid_output <= -out_limit[x]) && (state.error.axis[x] < 0)) {
-    return 0.0f;
-  }
+static inline vec3_t pid_compute_iterm_windup_vec(const vec3_t *pid_output) {
+  vec3_t windup = {.roll = 1.0f, .pitch = 1.0f, .yaw = 1.0f};
 
-#ifdef ITERM_RELAX //  Roll - Pitch  Setpoint based I term relax method
-  static float avg_setpoint[3] = {0, 0, 0};
-  if (x < 2) {
-    lpf(&avg_setpoint[x], state.setpoint.axis[x], lpfcalc(state.looptime, 1.0f / (float)RELAX_FREQUENCY_HZ)); // 11 Hz filter
-    const float hpfSetpoint = fabsf(state.setpoint.axis[x] - avg_setpoint[x]);
-    return max(1.0f - hpfSetpoint / RELAX_FACTOR, 0.0f);
-  }
+  for (uint8_t x = 0; x < PID_SIZE; x++) {
+    if ((pid_output->axis[x] >= out_limit.axis[x]) && (state.error.axis[x] > 0)) {
+      windup.axis[x] = 0.0f;
+    } else if ((pid_output->axis[x] <= -out_limit.axis[x]) && (state.error.axis[x] < 0)) {
+      windup.axis[x] = 0.0f;
+    }
+#ifdef ITERM_RELAX
+    else {
+      static vec3_t avg_setpoint = {.roll = 0, .pitch = 0, .yaw = 0};
+      static float lpf_coeff = 0;
+      static float lpf_coeff_yaw = 0;
+      if (lpf_coeff == 0) {
+        lpf_coeff = lpfcalc(state.looptime, 1.0f / (float)RELAX_FREQUENCY_HZ);
+        lpf_coeff_yaw = lpfcalc(state.looptime, 1.0f / (float)RELAX_FREQUENCY_HZ_YAW);
+      }
+      if (x < 2) {
+        lpf(&avg_setpoint.axis[x], state.setpoint.axis[x], lpf_coeff);
+        const float hpfSetpoint = fabsf(state.setpoint.axis[x] - avg_setpoint.axis[x]);
+        windup.axis[x] = max(1.0f - hpfSetpoint / RELAX_FACTOR, 0.0f);
+      }
 #ifdef ITERM_RELAX_YAW
-  else {                                                                                                          // axis is yaw
-    lpf(&avg_setpoint[x], state.setpoint.axis[x], lpfcalc(state.looptime, 1.0f / (float)RELAX_FREQUENCY_HZ_YAW)); // 25 Hz filter
-    const float hpfSetpoint = fabsf(state.setpoint.axis[x] - avg_setpoint[x]);
-    return max(1.0f - hpfSetpoint / RELAX_FACTOR_YAW, 0.0f);
-  }
+      else {
+        lpf(&avg_setpoint.axis[x], state.setpoint.axis[x], lpf_coeff_yaw);
+        const float hpfSetpoint = fabsf(state.setpoint.axis[x] - avg_setpoint.axis[x]);
+        windup.axis[x] = max(1.0f - hpfSetpoint / RELAX_FACTOR_YAW, 0.0f);
+      }
 #endif
+    }
 #endif
+  }
 
-  return 1.0f;
+  return windup;
 }
 
 static inline float pid_filter_dterm(uint8_t x, float dterm) {
@@ -92,26 +102,29 @@ static inline float pid_filter_dterm(uint8_t x, float dterm) {
   return dterm;
 }
 
-static inline bool pid_should_enable_iterm(uint8_t x) {
+static inline vec3_t pid_should_enable_iterm_vec() {
   static bool stick_movement = false;
   if (!flags.arm_state) {
     // disarmed, disable, flag no movement
     stick_movement = false;
-    return false;
+    return (vec3_t){{0.0f, 0.0f, 0.0f}};
   }
   if (flags.in_air) {
     // in-air, enable, flag no movement
     stick_movement = false;
-    return true;
+    return (vec3_t){{1.0f, 1.0f, 1.0f}};
   }
 
-  if (fabsf(state.setpoint.axis[x]) > 0.1f) {
-    // record first stick crossing
+  // Check for stick movement on any axis
+  if (fabsf(state.setpoint.roll) > 0.1f ||
+      fabsf(state.setpoint.pitch) > 0.1f ||
+      fabsf(state.setpoint.yaw) > 0.1f) {
     stick_movement = true;
   }
 
-  // enable if we recored stick movement previously
-  return stick_movement;
+  // enable if we recorded stick movement previously
+  const float enable = stick_movement ? 1.0f : 0.0f;
+  return (vec3_t){{enable, enable, enable}};
 }
 
 static inline float pid_voltage_compensation() {
@@ -159,30 +172,36 @@ void pid_calc() {
   // rotates errors
   ierror = vec3_rotate(ierror, state.gyro_delta_angle);
 
-#pragma GCC unroll 3
-  for (uint8_t x = 0; x < PID_SIZE; x++) {
-    const float current_kp = profile_current_pid_rates()->kp.axis[x] * pid_scales[0][x];
-    const float current_ki = profile_current_pid_rates()->ki.axis[x] * pid_scales[1][x];
-    const float current_kd = profile_current_pid_rates()->kd.axis[x] * pid_scales[2][x];
+  // Calculate deltas for derivatives
+  const vec3_t setpoint_delta = vec3_sub(state.setpoint, lastsetpoint);
+  const vec3_t gyro_delta = vec3_sub(state.gyro, lastrate);
 
-    // P term
-    state.pid_p_term.axis[x] = state.error.axis[x] * current_kp;
+  // Pre-calculate all PID gains using vec3 operations
+  const pid_rate_t *rates = profile_current_pid_rates();
+  const vec3_t current_kp = vec3_mul(vec3_mul_elem(rates->kp, pid_scales[0]), v_compensation);
 
-    // Pid Voltage Comp applied to P term only
-    state.pid_p_term.axis[x] *= v_compensation;
+  // Pre-calculate common terms
+  const float ki_looptime = state.looptime * (1.0f / 3.0f); // Simpson's rule constant * looptime
 
-    // I term
-    const float iterm_windup = pid_compute_iterm_windup(x, pid_output.axis[x]);
-    if (!pid_should_enable_iterm(x)) {
-      // wind down integral while we are still on ground and we do not get any input from the sticks
-      ierror.axis[x] *= 0.98f;
-    }
-    // SIMPSON_RULE_INTEGRAL
-    // assuming similar time intervals
-    ierror.axis[x] = ierror.axis[x] + 0.5f * (1.0f / 3.0f) * (last_error2.axis[x] + 4 * last_error.axis[x] + state.error.axis[x]) * current_ki * iterm_windup * state.looptime;
-    ierror.axis[x] = constrain(ierror.axis[x], -integral_limit[x], integral_limit[x]);
-    last_error2.axis[x] = last_error.axis[x];
-    last_error.axis[x] = state.error.axis[x];
+  // Pre-multiply Ki and Kd with their time factors
+  const vec3_t current_ki = vec3_mul(vec3_mul_elem(rates->ki, pid_scales[1]), ki_looptime);
+  const vec3_t current_kd = vec3_mul(vec3_mul_elem(rates->kd, pid_scales[2]), state.looptime_inverse);
+  const vec3_t iterm_enable = pid_should_enable_iterm_vec();
+  const vec3_t iterm_windup = pid_compute_iterm_windup_vec(&pid_output);
+  const bool rx_filter_enabled = state.rx_filter_hz > 0.1f;
+
+#pragma GCC unroll 3
+#pragma GCC ivdep // no loop dependencies, safe to vectorize
+  for (uint8_t x = 0; x < PID_SIZE; x++) {
+    // P term (voltage compensation already applied to current_kp)
+    state.pid_p_term.axis[x] = state.error.axis[x] * current_kp.axis[x];
+
+    // I term - combine decay and simpson integration
+    const float simpson_sum = last_error2.axis[x] + 4.0f * last_error.axis[x] + state.error.axis[x];
+    const float ierror_delta = simpson_sum * current_ki.axis[x] * iterm_windup.axis[x];
+    // If iterm disabled, decay by 0.98, otherwise add the delta
+    ierror.axis[x] = iterm_enable.axis[x] ? (ierror.axis[x] + ierror_delta) : (ierror.axis[x] * 0.98f);
+    ierror.axis[x] = constrain(ierror.axis[x], -integral_limit.axis[x], integral_limit.axis[x]);
 
     state.pid_i_term.axis[x] = ierror.axis[x];
 
@@ -194,19 +213,23 @@ void pid_calc() {
       transition_setpoint_weight = (fabsf(state.rx_filtered.axis[x]) * (stick_transition[x] / stick_accelerator[x])) + (1 - stick_transition[x]);
     }
 
-    float setpoint_derivative = (state.setpoint.axis[x] - lastsetpoint[x]) * current_kd * state.timefactor;
-    if (state.rx_filter_hz > 0.1f) {
+    float setpoint_derivative = setpoint_delta.axis[x] * current_kd.axis[x];
+    if (rx_filter_enabled) {
       setpoint_derivative = filter_lp_pt1_step(&rx_filter, &rx_filter_state[x], setpoint_derivative);
     }
-    lastsetpoint[x] = state.setpoint.axis[x];
 
-    const float gyro_derivative = (state.gyro.axis[x] - lastrate[x]) * current_kd * state.timefactor * tda_compensation;
-    lastrate[x] = state.gyro.axis[x];
+    const float gyro_derivative = gyro_delta.axis[x] * current_kd.axis[x] * tda_compensation;
 
     const float dterm = (setpoint_derivative * stick_accelerator[x] * transition_setpoint_weight) - (gyro_derivative);
     state.pid_d_term.axis[x] = pid_filter_dterm(x, dterm);
 
     state.pidoutput.axis[x] = pid_output.axis[x] = state.pid_p_term.axis[x] + state.pid_i_term.axis[x] + state.pid_d_term.axis[x];
-    state.pidoutput.axis[x] = constrain(state.pidoutput.axis[x], -out_limit[x], out_limit[x]);
+    state.pidoutput.axis[x] = constrain(state.pidoutput.axis[x], -out_limit.axis[x], out_limit.axis[x]);
   }
+
+  // Update history
+  last_error2 = last_error;
+  last_error = state.error;
+  lastsetpoint = state.setpoint;
+  lastrate = state.gyro;
 }
diff --git a/src/util/vector.h b/src/util/vector.h
@@ -31,7 +31,41 @@ cbor_result_t cbor_decode_compact_vec3_t(cbor_value_t *dec, compact_vec3_t *vec)
 void vec3_from_array(vec3_t *out, float *in);
 void vec3_compress(compact_vec3_t *out, vec3_t *in, float scale);
 
-// Rodrigues' rotation formula, used in hot paths, lets inline
+// Helper functions for vec3_rotate
+static inline vec3_t vec3_add(vec3_t a, vec3_t b) {
+  return (vec3_t){{a.axis[0] + b.axis[0],
+                   a.axis[1] + b.axis[1],
+                   a.axis[2] + b.axis[2]}};
+}
+
+static inline vec3_t vec3_sub(vec3_t a, vec3_t b) {
+  return (vec3_t){{a.axis[0] - b.axis[0],
+                   a.axis[1] - b.axis[1],
+                   a.axis[2] - b.axis[2]}};
+}
+
+static inline vec3_t vec3_mul(vec3_t v, float s) {
+  return (vec3_t){{v.axis[0] * s,
+                   v.axis[1] * s,
+                   v.axis[2] * s}};
+}
+
+static inline vec3_t vec3_mul_elem(vec3_t a, vec3_t b) {
+  return (vec3_t){{a.axis[0] * b.axis[0],
+                   a.axis[1] * b.axis[1],
+                   a.axis[2] * b.axis[2]}};
+}
+
+static inline float vec3_dot(vec3_t a, vec3_t b) {
+  return a.axis[0] * b.axis[0] + a.axis[1] * b.axis[1] + a.axis[2] * b.axis[2];
+}
+
+static inline vec3_t vec3_cross(vec3_t a, vec3_t b) {
+  return (vec3_t){{a.axis[1] * b.axis[2] - a.axis[2] * b.axis[1],
+                   a.axis[2] * b.axis[0] - a.axis[0] * b.axis[2],
+                   a.axis[0] * b.axis[1] - a.axis[1] * b.axis[0]}};
+}
+
 static inline vec3_t vec3_rotate(const vec3_t vec, const vec3_t rot) {
   return (vec3_t){{
       vec.axis[0] - vec.axis[1] * rot.axis[2] + vec.axis[2] * rot.axis[1],
diff --git a/test/test_native/test_pid.c b/test/test_native/test_pid.c
@@ -41,6 +41,7 @@ static void pid_setUp(void) {
 
   // Initialize control state
   state.looptime = 0.000125f; // 8kHz
+  state.looptime_inverse = 1.0f / state.looptime; // 8000Hz
   state.vbat_filtered = 4.0f;
   state.vbat_compensated = 1.0f;
 
@@ -117,8 +118,8 @@ void test_pid_derivative_calculation(void) {
   profile.pid.stick_rates[0].accelerator.roll = 1.0f;
   profile.pid.stick_rates[0].transition.roll = 1.0f;
   
-  // Initialize state.timefactor (needed for D-term)
-  state.timefactor = 0.0032f / state.looptime; // Original timefactor calculation
+  // Initialize state.looptime_inverse (needed for D-term)
+  state.looptime_inverse = 1.0f / state.looptime;
   
   // Initialize with no gyro rate
   state.gyro.roll = 0.0f;
@@ -147,8 +148,8 @@ void test_pid_dterm_setpoint_response(void) {
   profile.pid.stick_rates[0].accelerator.roll = 1.0f;
   profile.pid.stick_rates[0].transition.roll = 0.0f; // No transition weighting
   
-  // Initialize state.timefactor
-  state.timefactor = 0.0032f / state.looptime;
+  // Initialize state.looptime_inverse
+  state.looptime_inverse = 1.0f / state.looptime;
   
   // Set some stick input so transition weight isn't 0
   state.rx_filtered.roll = 0.0f; // No stick input
@@ -179,8 +180,8 @@ void test_pid_dterm_combined_response(void) {
   profile.pid.stick_rates[0].accelerator.roll = 1.0f;
   profile.pid.stick_rates[0].transition.roll = 0.0f;
   
-  // Initialize state.timefactor
-  state.timefactor = 0.0032f / state.looptime;
+  // Initialize state.looptime_inverse
+  state.looptime_inverse = 1.0f / state.looptime;
   
   // Initialize
   state.gyro.roll = 0.0f;
@@ -210,8 +211,8 @@ void test_pid_dterm_stick_weighting(void) {
   profile.pid.stick_rates[0].accelerator.roll = 2.0f; // Higher than 1
   profile.pid.stick_rates[0].transition.roll = 0.0f; // Ensure setpoint has effect
   
-  // Initialize state.timefactor
-  state.timefactor = 0.0032f / state.looptime;
+  // Initialize state.looptime_inverse
+  state.looptime_inverse = 1.0f / state.looptime;
   
   // Set some stick input
   state.rx_filtered.roll = 0.5f;