bertos/algo/ramp.c

   1 /*!
   2  * \file
   3  * <!--
   4  * Copyright 2004, 2008 Develer S.r.l. (http://www.develer.com/)
   5  * All Rights Reserved.
   6  * -->
   7  *
   8  * \brief Compute, save and load ramps for stepper motors (implementation)
   9  *
  10  * \version $Id$
  11  *
  12  * \author Simone Zinanni <s.zinanni@develer.com>
  13  * \author Bernardo Innocenti <bernie@develer.com>
  14  * \author Giovanni Bajo <rasky@develer.com>
  15  * \author Daniele Basile <asterix@develer.com>
  16  *
  17  *
  18  * The formula used by the ramp is the following:
  19  *
  20  * <pre>
  21  *            a * b
  22  * f(t) = -------------
  23  *         lerp(a,b,t)
  24  * </pre>
  25  *
  26  * Where <code>a</code> and <code>b</code> are the maximum and minimum speed
  27  * respectively (minimum and maximum wavelength respectively), and <code>lerp</code>
  28  * is a linear interpolation with a factor:
  29  *
  30  * <pre>
  31  * lerp(a,b,t) =  a + t * (b - a)  =  (a * (1 - t)) + (b * t)
  32  * </pre>
  33  *
  34  * <code>t</code> must be in the [0,1] interval. It is easy to see that the
  35  * following holds true:
  36  *
  37  * <pre>
  38  * f(0) = b,   f(1) = a
  39  * </pre>
  40  *
  41  * And that the function is monotonic. So, the function effectively interpolates
  42  * between the maximum and minimum speed through its domain ([0,1] -> [b,a]).
  43  *
  44  * The curve drawn by this function is similar to 1 / (sqrt(n)), so it is slower
  45  * than a linear acceleration (which would be 1/n).
  46  *
  47  * The floating point version uses a slightly modified function which accepts
  48  * the parameter in the domain [0, MT] (where MT is maxTime, the length of the
  49  * ramp, which is a setup parameter for the ramp). This is done to reduce the
  50  * number of operations per step. The formula looks like this:
  51  *
  52  * <pre>
  53  *               a * b * MT
  54  * g(t) = ----------------------------
  55  *           (a * MT) + t * (b - a)
  56  * </pre>
  57  *
  58  * It can be shown that this <code>g(t) = f(t * MT)</code>. The denominator
  59  * is a linear interpolation in the range [b*MT, a*MT], as t moves in the
  60  * interval [0, MT]. So the interpolation interval of the function is again
  61  * [b, a]. The implementation caches the value of the numerator and parts
  62  * of the denominator, so that the formula becomes:
  63  *
  64  * <pre>
  65  * alpha = a * b * MT
  66  * beta = a * MT
  67  * gamma = b - a
  68  *
  69  *                alpha
  70  * g(t) = ----------------------
  71  *           beta + t * gamma
  72  * </pre>
  73  *
  74  * and <code>t</code> is exactly the parameter that ramp_evaluate() gets,
  75  * that is the current time (in range [0, MT]). The operations performed
  76  * for each step are just an addition, a multiplication and a division.
  77  *
  78  * The fixed point version of the formula instead transforms the original
  79  * function as follows:
  80  *
  81  * <pre>
  82  *                   a * b                         a
  83  *  f(t) =  -------------------------  =  --------------------
  84  *                 a                         a
  85  *           b * ( - * (1 - t) + t )         - * (1 - t) + t
  86  *                 b                         b
  87  * </pre>
  88  *
  89  * <code>t</code> must be computed by dividing the current time (24 bit integer)
  90  * by the maximum time (24 bit integer). This is done by precomputing the
  91  * reciprocal of the maximum time as a 0.32 fixed point number, and multiplying
  92  * it to the current time. Multiplication is performed 8-bits a time by
  93  * FIX_MULT32(), so that we end up with a 0.16 fixed point number for
  94  * <code>t</code> (and <code>1-t</code> is just its twos-complement negation).
  95  * <code>a/b</code> is in the range [0,1] (because a is always less than b,
  96  * being the minimum wavelength), so it is precomputed as a 0.16 fixed point.
  97  * The final step is then computing the denominator and executing the division
  98  * (32 cycles using the 1-step division instruction in the DSP).
  99  *
 100  * The assembly implementation is needed for efficiency, but a C version of it
 101  * can be easily written, in case it is needed in the future.
 102  *
 103  */
 104
 105 #include "ramp.h"
 106 #include <cfg/debug.h>
 107
 108 #include <string.h> // memcpy()
 109
 110 /**
 111  * Multiply \p a and \p b two integer at 32 bit and extract the high 16 bit word.
 112  */
 113 #define FIX_MULT32(a,b)  (((uint64_t)(a)*(uint32_t)(b)) >> 16)
 114
 115 void ramp_compute(struct Ramp *ramp, uint32_t clocksRamp, uint16_t clocksMinWL, uint16_t clocksMaxWL)
 116 {
 117         ASSERT(clocksMaxWL >= clocksMinWL);
 118
 119         // Save values in ramp struct
 120         ramp->clocksRamp = clocksRamp;
 121         ramp->clocksMinWL = clocksMinWL;
 122         ramp->clocksMaxWL = clocksMaxWL;
 123
 124 #if RAMP_USE_FLOATING_POINT
 125         ramp->precalc.gamma = ramp->clocksMaxWL - ramp->clocksMinWL;
 126         ramp->precalc.beta = (float)ramp->clocksMinWL * (float)ramp->clocksRamp;
 127         ramp->precalc.alpha = ramp->precalc.beta * (float)ramp->clocksMaxWL;
 128
 129 #else
 130     ramp->precalc.max_div_min = ((uint32_t)clocksMinWL << 16) / (uint32_t)clocksMaxWL;
 131
 132     /* Calcola 1/total_time in fixed point .32. Assumiamo che la rampa possa al
 133      * massimo avere 25 bit (cioé valore in tick fino a 2^25, che con il
 134      * prescaler=3 sono circa 7 secondi). Inoltre, togliamo qualche bit di precisione
 135      * da destra (secondo quanto specificato in RAMP_CLOCK_SHIFT_PRECISION).
 136      */
 137     ASSERT(ramp->clocksRamp < (1UL << (24 + RAMP_CLOCK_SHIFT_PRECISION)));
 138     ramp->precalc.inv_total_time = 0xFFFFFFFFUL / (ramp->clocksRamp >> RAMP_CLOCK_SHIFT_PRECISION);
 139     ASSERT(ramp->precalc.inv_total_time < 0x1000000UL);
 140
 141 #endif
 142 }
 143
 144
 145 void ramp_setup(struct Ramp* ramp, uint32_t length, uint32_t minFreq, uint32_t maxFreq)
 146 {
 147         uint32_t minWL, maxWL;
 148
 149         minWL = TIME2CLOCKS(FREQ2MICROS(maxFreq));
 150         maxWL = TIME2CLOCKS(FREQ2MICROS(minFreq));
 151
 152         ASSERT2(minWL < 65536UL, "Maximum frequency too high");
 153         ASSERT2(maxWL < 65536UL, "Minimum frequency too high");
 154         ASSERT(maxFreq > minFreq);
 155
 156         ramp_compute(
 157                 ramp,
 158                 TIME2CLOCKS(length),
 159                 TIME2CLOCKS(FREQ2MICROS(maxFreq)),
 160                 TIME2CLOCKS(FREQ2MICROS(minFreq))
 161         );
 162 }
 163
 164 void ramp_default(struct Ramp *ramp)
 165 {
 166         ramp_setup(ramp, RAMP_DEF_TIME, RAMP_DEF_MINFREQ, RAMP_DEF_MAXFREQ);
 167 }
 168
 169 #if RAMP_USE_FLOATING_POINT
 170
 171 float ramp_evaluate(const struct Ramp* ramp, float curClock)
 172 {
 173         return ramp->precalc.alpha / (curClock * ramp->precalc.gamma + ramp->precalc.beta);
 174 }
 175
 176 #else
 177
 178 INLINE uint32_t fix_mult32(uint32_t m1, uint32_t m2)
 179 {
 180         uint32_t accum = 0;
 181         accum += m1 * ((m2 >> 0) & 0xFF);
 182         accum >>= 8;
 183         accum += m1 * ((m2 >> 8) & 0xFF);
 184         accum >>= 8;
 185         accum += m1 * ((m2 >> 16) & 0xFF);
 186         return accum;
 187 }
 188
 189 //   a*b >> 16
 190 INLINE uint16_t fix_mult16(uint16_t a, uint32_t b)
 191 {
 192         return (b*(uint32_t)a) >> 16;
 193 }
 194
 195 uint16_t FAST_FUNC ramp_evaluate(const struct Ramp* ramp, uint32_t curClock)
 196 {
 197         uint16_t t = FIX_MULT32(curClock >> RAMP_CLOCK_SHIFT_PRECISION, ramp->precalc.inv_total_time);
 198         uint16_t denom =  fix_mult16((uint16_t)~t + 1, ramp->precalc.max_div_min) + t;
 199         uint16_t cur_delta = ((uint32_t)ramp->clocksMinWL << 16) / denom;
 200
 201         return cur_delta;
 202 }
 203
 204 #endif
 205
 206