From: Joseph Coffland <joseph@cauldrondevelopment.com>
Date: Wed, 21 Sep 2016 19:17:19 +0000 (-0700)
Subject: Use fast invsqrt() function
X-Git-Url: https://git.buildbotics.com/?a=commitdiff_plain;h=d4a780f5cd85a0aa06350b0751b168bd20619a29;p=bbctrl-firmware

Use fast invsqrt() function
---

diff --git a/Makefile b/Makefile
index a4e91b6..e03a917 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ COMMON = -mmcu=$(MCU)
 
 CFLAGS += $(COMMON)
 CFLAGS += -Wall -Werror # -Wno-error=unused-function
+CFLAGS += -Wno-error=strict-aliasing # for _invsqrt
 CFLAGS += -gdwarf-2 -std=gnu99 -DF_CPU=$(CLOCK)UL -O3 -funroll-loops
 CFLAGS += -funsigned-bitfields -fpack-struct -fshort-enums -funsigned-char
 CFLAGS += -MD -MP -MT $@ -MF build/dep/$(@F).d
diff --git a/src/plan/buffer.h b/src/plan/buffer.h
index 4230bcc..8554fdb 100644
--- a/src/plan/buffer.h
+++ b/src/plan/buffer.h
@@ -81,7 +81,6 @@ typedef struct mp_buffer_t {      // See Planning Velocity Notes
   float delta_vmax;               // max velocity difference for this move
 
   float jerk;                     // maximum linear jerk term for this move
-  float recip_jerk;               // 1/Jm used for planning (computed & cached)
   float cbrt_jerk;                // cube root of Jm (computed & cached)
 } mp_buffer_t;
 
diff --git a/src/plan/exec.c b/src/plan/exec.c
index 4049ed8..887aab4 100644
--- a/src/plan/exec.c
+++ b/src/plan/exec.c
@@ -58,13 +58,13 @@ typedef struct {
   float cruise_velocity;
   float exit_velocity;
 
-  float segments;           // number of segments in line or arc
+  float segments;           // number of segments in line
   uint32_t segment_count;   // count of running segments
-  float segment_velocity;   // computed velocity for aline segment
-  float segment_time;       // actual time increment per aline segment
+  float segment_velocity;   // computed velocity for segment
+  float segment_time;       // actual time increment per segment
   forward_dif_t fdif;       // forward difference levels
   bool hold_planned;        // true when a feedhold has been planned
-  move_section_t section;   // what section is the move in?
+  move_section_t section;   // current move section
   bool section_new;         // true if it's a new section
 } mp_exec_t;
 
@@ -214,8 +214,7 @@ static void _plan_hold() {
   // Compute next_segment velocity, velocity left to shed to brake to zero
   float braking_velocity = _compute_next_segment_velocity();
   // Distance to brake to zero from braking_velocity, bf is OK to use here
-  float braking_length =
-    mp_get_target_length(braking_velocity, 0, bf->recip_jerk);
+  float braking_length = mp_get_target_length(braking_velocity, 0, bf->jerk);
 
   // Hack to prevent Case 2 moves for perfect-fit decels.  Happens when homing.
   if (available_length < braking_length && fp_ZERO(bf->exit_velocity))
diff --git a/src/plan/jog.c b/src/plan/jog.c
index 6606441..8a7f297 100644
--- a/src/plan/jog.c
+++ b/src/plan/jog.c
@@ -101,7 +101,7 @@ static stat_t _exec_jog(mp_buffer_t *bf) {
         float jerk = axes[axis].jerk_max * JERK_MULTIPLIER;
 
         // Compute length to velocity given max jerk
-        float length = mp_get_target_length(Vi, Vt, 1 / (jerk * JOG_JERK_MULT));
+        float length = mp_get_target_length(Vi, Vt, jerk * JOG_JERK_MULT);
 
         // Compute move time
         float move_time = 2 * length / (Vi + Vt);
diff --git a/src/plan/line.c b/src/plan/line.c
index c2ca3d0..ec64561 100644
--- a/src/plan/line.c
+++ b/src/plan/line.c
@@ -233,16 +233,13 @@ static float _calc_jerk(const float axis_square[], const float unit[]) {
 /// Compute cached jerk terms used by planning
 static void _calc_and_cache_jerk_values(mp_buffer_t *bf) {
   static float jerk = 0;
-  static float recip_jerk = 0;
   static float cbrt_jerk = 0;
 
   if (JERK_MATCH_PRECISION < fabs(bf->jerk - jerk)) { // Tolerance comparison
     jerk = bf->jerk;
-    recip_jerk = 1 / bf->jerk;
     cbrt_jerk = cbrt(bf->jerk);
   }
 
-  bf->recip_jerk = recip_jerk;
   bf->cbrt_jerk = cbrt_jerk;
 }
 
diff --git a/src/plan/planner.c b/src/plan/planner.c
index de21961..d10b677 100644
--- a/src/plan/planner.c
+++ b/src/plan/planner.c
@@ -308,7 +308,7 @@ void mp_calculate_trapezoid(mp_buffer_t *bf) {
   //   MIN_BODY_LENGTH
   bf->body_length = 0;
   float minimum_length =
-    mp_get_target_length(bf->entry_velocity, bf->exit_velocity, bf->recip_jerk);
+    mp_get_target_length(bf->entry_velocity, bf->exit_velocity, bf->jerk);
 
   // head-only & tail-only cases
   if (bf->length <= (minimum_length + MIN_BODY_LENGTH)) {
@@ -343,11 +343,9 @@ void mp_calculate_trapezoid(mp_buffer_t *bf) {
 
   // Set head and tail lengths for evaluating the next cases
   bf->head_length =
-    mp_get_target_length(bf->entry_velocity, bf->cruise_velocity,
-                         bf->recip_jerk);
+    mp_get_target_length(bf->entry_velocity, bf->cruise_velocity, bf->jerk);
   bf->tail_length =
-    mp_get_target_length(bf->exit_velocity, bf->cruise_velocity,
-                         bf->recip_jerk);
+    mp_get_target_length(bf->exit_velocity, bf->cruise_velocity, bf->jerk);
   if (bf->head_length < MIN_HEAD_LENGTH) { bf->head_length = 0;}
   if (bf->tail_length < MIN_TAIL_LENGTH) { bf->tail_length = 0;}
 
@@ -385,11 +383,9 @@ void mp_calculate_trapezoid(mp_buffer_t *bf) {
       // initialize from previous iteration
       bf->cruise_velocity = computed_velocity;
       bf->head_length =
-        mp_get_target_length(bf->entry_velocity, bf->cruise_velocity,
-                             bf->recip_jerk);
+        mp_get_target_length(bf->entry_velocity, bf->cruise_velocity, bf->jerk);
       bf->tail_length =
-        mp_get_target_length(bf->exit_velocity, bf->cruise_velocity,
-                             bf->recip_jerk);
+        mp_get_target_length(bf->exit_velocity, bf->cruise_velocity, bf->jerk);
 
       if (bf->head_length > bf->tail_length) {
         bf->head_length =
@@ -410,8 +406,7 @@ void mp_calculate_trapezoid(mp_buffer_t *bf) {
     // set velocity and clean up any parts that are too short
     bf->cruise_velocity = computed_velocity;
     bf->head_length =
-      mp_get_target_length(bf->entry_velocity, bf->cruise_velocity,
-                           bf->recip_jerk);
+      mp_get_target_length(bf->entry_velocity, bf->cruise_velocity, bf->jerk);
     bf->tail_length = bf->length - bf->head_length;
 
     if (bf->head_length < MIN_HEAD_LENGTH) {
@@ -514,7 +509,6 @@ void mp_print_queue(mp_buffer_t *bf) {
  *   bl->cruise_vmax       - used during forward planning to set cruise velocity
  *   bl->exit_vmax         - used during forward planning to set exit velocity
  *   bl->delta_vmax        - used during forward planning to set exit velocity
- *   bl->recip_jerk        - used during trapezoid generation
  *   bl->cbrt_jerk         - used during trapezoid generation
  *
  * Variables that will be set during processing:
@@ -681,8 +675,8 @@ void mp_queue_push_nonstop(buffer_cb_t cb, uint32_t line) {
  * [2] Cannot assume Vf >= Vi due to rounding errors and use of
  *     PLANNER_VELOCITY_TOLERANCE necessitating the introduction of fabs()
  */
-float mp_get_target_length(float Vi, float Vf, float recip_jerk) {
-  return fabs(Vi - Vf) * sqrt(fabs(Vi - Vf) * recip_jerk);
+float mp_get_target_length(float Vi, float Vf, float jerk) {
+  return fabs(Vi - Vf) * invsqrt(jerk / fabs(Vi - Vf));
 }
 
 
diff --git a/src/plan/planner.h b/src/plan/planner.h
index e205712..0872f5c 100644
--- a/src/plan/planner.h
+++ b/src/plan/planner.h
@@ -85,5 +85,5 @@ void mp_replan_all();
 
 void mp_queue_push_nonstop(buffer_cb_t cb, uint32_t line);
 
-float mp_get_target_length(float Vi, float Vf, float recip_jerk);
+float mp_get_target_length(float Vi, float Vf, float jerk);
 float mp_get_target_velocity(float Vi, float L, const mp_buffer_t *bf);
diff --git a/src/plan/runtime.c b/src/plan/runtime.c
index b406589..333f367 100644
--- a/src/plan/runtime.c
+++ b/src/plan/runtime.c
@@ -197,7 +197,7 @@ stat_t mp_runtime_move_to_target(float target[], float time) {
 
   bool use_error = false;
   if (!fp_ZERO(old_length_sqr)) {
-    float new_time = time * sqrt(new_length_sqr / old_length_sqr);
+    float new_time = time * invsqrt(old_length_sqr / new_length_sqr);
 
     if (EPSILON <= new_time && new_time <= MAX_SEGMENT_TIME) {
       time = new_time;
diff --git a/src/util.c b/src/util.c
index bbcad9e..e5e8341 100644
--- a/src/util.c
+++ b/src/util.c
@@ -34,3 +34,21 @@ float get_axis_vector_length(const float a[], const float b[]) {
               square(a[AXIS_Z] - b[AXIS_Z]) + square(a[AXIS_A] - b[AXIS_A]) +
               square(a[AXIS_B] - b[AXIS_B]) + square(a[AXIS_C] - b[AXIS_C]));
 }
+
+
+/// Fast inverse square root originally from Quake III Arena code.  Original
+/// comments left intact.
+/// See: https://en.wikipedia.org/wiki/Fast_inverse_square_root
+float invsqrt(float number) {
+  const float threehalfs = 1.5F;
+
+  float x2 = number * 0.5;
+  float y = number;
+  long i = *(long *)&y;                // evil floating point bit level hacking
+  i = 0x5f3759df - (i >> 1);           // what the fuck?
+  y = *(float *)&i;
+  y = y * (threehalfs - x2 * y * y);   // 1st iteration
+  y = y * (threehalfs - x2 * y * y);   // 2nd iteration, this can be removed
+
+  return y;
+}
diff --git a/src/util.h b/src/util.h
index e2356e8..6367c2a 100644
--- a/src/util.h
+++ b/src/util.h
@@ -52,6 +52,8 @@ inline float min4(float a, float b, float c, float d)
 inline float max4(float a, float b, float c, float d)
 {return max(max(a, b), max(c, d));}
 
+float invsqrt(float number);
+
 // Floating-point utilities
 #define EPSILON 0.00001 // allowable rounding error for floats
 inline bool fp_EQ(float a, float b) {return fabs(a - b) < EPSILON;}