️ Major shaper optimization

Co-Authored-By: tombrazier <68918209+tombrazier@users.noreply.github.com>
This commit is contained in:
Scott Lahteine 2023-03-11 19:22:18 -06:00
parent e1a209d0ce
commit e4b83ad5f5
2 changed files with 51 additions and 25 deletions

View file

@ -1676,7 +1676,7 @@ void Stepper::pulse_phase_isr() {
int32_t de = delta_error[_AXIS(AXIS)] + advance_dividend[_AXIS(AXIS)]; \ int32_t de = delta_error[_AXIS(AXIS)] + advance_dividend[_AXIS(AXIS)]; \
if (de >= 0) { \ if (de >= 0) { \
step_needed.set(_AXIS(AXIS)); \ step_needed.set(_AXIS(AXIS)); \
de -= advance_divisor; \ de -= advance_divisor_cached; \
} \ } \
delta_error[_AXIS(AXIS)] = de; \ delta_error[_AXIS(AXIS)] = de; \
}while(0) }while(0)
@ -1702,19 +1702,22 @@ void Stepper::pulse_phase_isr() {
#define HYSTERESIS(AXIS) _HYSTERESIS(AXIS) #define HYSTERESIS(AXIS) _HYSTERESIS(AXIS)
#define PULSE_PREP_SHAPING(AXIS, DELTA_ERROR, DIVIDEND) do{ \ #define PULSE_PREP_SHAPING(AXIS, DELTA_ERROR, DIVIDEND) do{ \
if (step_needed.test(_AXIS(AXIS))) { \ int16_t de = DELTA_ERROR + (DIVIDEND); \
DELTA_ERROR += (DIVIDEND); \ const bool step_fwd = de >= (64 + HYSTERESIS(AXIS)), \
if ((MAXDIR(AXIS) && DELTA_ERROR <= -(64 + HYSTERESIS(AXIS))) || (MINDIR(AXIS) && DELTA_ERROR >= (64 + HYSTERESIS(AXIS)))) { \ step_bak = de <= -(64 + HYSTERESIS(AXIS)); \
if (step_fwd || step_bak) { \
de += step_fwd ? -128 : 128; \
if ((MAXDIR(AXIS) && step_bak) || (MINDIR(AXIS) && step_fwd)) { \
{ USING_TIMED_PULSE(); START_TIMED_PULSE(); AWAIT_LOW_PULSE(); } \ { USING_TIMED_PULSE(); START_TIMED_PULSE(); AWAIT_LOW_PULSE(); } \
TBI(last_direction_bits, _AXIS(AXIS)); \ TBI(last_direction_bits, _AXIS(AXIS)); \
DIR_WAIT_BEFORE(); \ DIR_WAIT_BEFORE(); \
SET_STEP_DIR(AXIS); \ SET_STEP_DIR(AXIS); \
DIR_WAIT_AFTER(); \ DIR_WAIT_AFTER(); \
} \ } \
step_needed.set(_AXIS(AXIS), DELTA_ERROR <= -(64 + HYSTERESIS(AXIS)) || DELTA_ERROR >= (64 + HYSTERESIS(AXIS))); \
if (step_needed.test(_AXIS(AXIS))) \
DELTA_ERROR += MAXDIR(AXIS) ? -128 : 128; \
} \ } \
else \
step_needed.clear(_AXIS(AXIS)); \
DELTA_ERROR = de; \
}while(0) }while(0)
// Start an active pulse if needed // Start an active pulse if needed
@ -1839,6 +1842,9 @@ void Stepper::pulse_phase_isr() {
#endif // DIRECT_STEPPING #endif // DIRECT_STEPPING
if (!is_page) { if (!is_page) {
// Give the compiler a clue to store advance_divisor in registers for what follows
const uint32_t advance_divisor_cached = advance_divisor;
// Determine if pulses are needed // Determine if pulses are needed
#if HAS_X_STEP #if HAS_X_STEP
PULSE_PREP(X); PULSE_PREP(X);
@ -1883,19 +1889,19 @@ void Stepper::pulse_phase_isr() {
#if HAS_SHAPING #if HAS_SHAPING
// record an echo if a step is needed in the primary bresenham // record an echo if a step is needed in the primary bresenham
const bool x_step = TERN0(INPUT_SHAPING_X, shaping_x.enabled && step_needed.x), const bool x_step = TERN0(INPUT_SHAPING_X, step_needed.x && shaping_x.enabled),
y_step = TERN0(INPUT_SHAPING_Y, shaping_y.enabled && step_needed.y); y_step = TERN0(INPUT_SHAPING_Y, step_needed.y && shaping_y.enabled);
if (x_step || y_step) if (x_step || y_step)
ShapingQueue::enqueue(x_step, TERN0(INPUT_SHAPING_X, shaping_x.forward), y_step, TERN0(INPUT_SHAPING_Y, shaping_y.forward)); ShapingQueue::enqueue(x_step, TERN0(INPUT_SHAPING_X, shaping_x.forward), y_step, TERN0(INPUT_SHAPING_Y, shaping_y.forward));
// do the first part of the secondary bresenham // do the first part of the secondary bresenham
#if ENABLED(INPUT_SHAPING_X) #if ENABLED(INPUT_SHAPING_X)
if (shaping_x.enabled) if (x_step)
PULSE_PREP_SHAPING(X, shaping_x.delta_error, shaping_x.factor1 * (shaping_x.forward ? 1 : -1)); PULSE_PREP_SHAPING(X, shaping_x.delta_error, shaping_x.forward ? shaping_x.factor1 : -shaping_x.factor1);
#endif #endif
#if ENABLED(INPUT_SHAPING_Y) #if ENABLED(INPUT_SHAPING_Y)
if (shaping_y.enabled) if (y_step)
PULSE_PREP_SHAPING(Y, shaping_y.delta_error, shaping_y.factor1 * (shaping_y.forward ? 1 : -1)); PULSE_PREP_SHAPING(Y, shaping_y.delta_error, shaping_y.forward ? shaping_y.factor1 : -shaping_y.factor1);
#endif #endif
#endif #endif
} }
@ -2008,7 +2014,7 @@ void Stepper::pulse_phase_isr() {
#if ENABLED(INPUT_SHAPING_X) #if ENABLED(INPUT_SHAPING_X)
if (step_needed.x) { if (step_needed.x) {
const bool forward = ShapingQueue::dequeue_x(); const bool forward = ShapingQueue::dequeue_x();
PULSE_PREP_SHAPING(X, shaping_x.delta_error, shaping_x.factor2 * (forward ? 1 : -1)); PULSE_PREP_SHAPING(X, shaping_x.delta_error, (forward ? shaping_x.factor2 : -shaping_x.factor2));
PULSE_START(X); PULSE_START(X);
} }
#endif #endif
@ -2016,7 +2022,7 @@ void Stepper::pulse_phase_isr() {
#if ENABLED(INPUT_SHAPING_Y) #if ENABLED(INPUT_SHAPING_Y)
if (step_needed.y) { if (step_needed.y) {
const bool forward = ShapingQueue::dequeue_y(); const bool forward = ShapingQueue::dequeue_y();
PULSE_PREP_SHAPING(Y, shaping_y.delta_error, shaping_y.factor2 * (forward ? 1 : -1)); PULSE_PREP_SHAPING(Y, shaping_y.delta_error, (forward ? shaping_y.factor2 : -shaping_y.factor2));
PULSE_START(Y); PULSE_START(Y);
} }
#endif #endif

View file

@ -392,16 +392,36 @@ constexpr ena_mask_t enable_overlap[] = {
TERN_(INPUT_SHAPING_Y, if (axis == Y_AXIS) delay_y = delay); TERN_(INPUT_SHAPING_Y, if (axis == Y_AXIS) delay_y = delay);
} }
static void enqueue(const bool x_step, const bool x_forward, const bool y_step, const bool y_forward) { static void enqueue(const bool x_step, const bool x_forward, const bool y_step, const bool y_forward) {
TERN_(INPUT_SHAPING_X, if (head_x == tail && x_step) peek_x_val = delay_x); #if ENABLED(INPUT_SHAPING_X)
TERN_(INPUT_SHAPING_Y, if (head_y == tail && y_step) peek_y_val = delay_y); if (x_step) {
if (head_x == tail) peek_x_val = delay_x;
echo_axes[tail].x = x_forward ? ECHO_FWD : ECHO_BWD;
_free_count_x--;
}
else {
echo_axes[tail].x = ECHO_NONE;
if (head_x != tail)
_free_count_x--;
else if (++head_x == shaping_echoes)
head_x = 0;
}
#endif
#if ENABLED(INPUT_SHAPING_Y)
if (y_step) {
if (head_y == tail) peek_y_val = delay_y;
echo_axes[tail].y = y_forward ? ECHO_FWD : ECHO_BWD;
_free_count_y--;
}
else {
echo_axes[tail].y = ECHO_NONE;
if (head_y != tail)
_free_count_y--;
else if (++head_y == shaping_echoes)
head_y = 0;
}
#endif
times[tail] = now; times[tail] = now;
TERN_(INPUT_SHAPING_X, echo_axes[tail].x = x_step ? (x_forward ? ECHO_FWD : ECHO_BWD) : ECHO_NONE);
TERN_(INPUT_SHAPING_Y, echo_axes[tail].y = y_step ? (y_forward ? ECHO_FWD : ECHO_BWD) : ECHO_NONE);
if (++tail == shaping_echoes) tail = 0; if (++tail == shaping_echoes) tail = 0;
TERN_(INPUT_SHAPING_X, _free_count_x--);
TERN_(INPUT_SHAPING_Y, _free_count_y--);
TERN_(INPUT_SHAPING_X, if (echo_axes[head_x].x == ECHO_NONE) dequeue_x());
TERN_(INPUT_SHAPING_Y, if (echo_axes[head_y].y == ECHO_NONE) dequeue_y());
} }
#if ENABLED(INPUT_SHAPING_X) #if ENABLED(INPUT_SHAPING_X)
static shaping_time_t peek_x() { return peek_x_val; } static shaping_time_t peek_x() { return peek_x_val; }
@ -445,11 +465,11 @@ constexpr ena_mask_t enable_overlap[] = {
struct ShapeParams { struct ShapeParams {
float frequency; float frequency;
float zeta; float zeta;
bool enabled; bool enabled : 1;
bool forward : 1;
int16_t delta_error = 0; // delta_error for seconday bresenham mod 128 int16_t delta_error = 0; // delta_error for seconday bresenham mod 128
uint8_t factor1; uint8_t factor1;
uint8_t factor2; uint8_t factor2;
bool forward;
int32_t last_block_end_pos = 0; int32_t last_block_end_pos = 0;
}; };