Skip to content

Commit b4705cf

Browse files
committed
silk_LBRR_encode_Fxx: Move stack alloc to ALLOC()
Move silk_nsq_state objects and byte buffers to ALLOC() to reduce main stack use.
1 parent 85a2fb1 commit b4705cf

File tree

2 files changed

+33
-18
lines changed

2 files changed

+33
-18
lines changed

silk/fixed/encode_frame_FIX.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ opus_int silk_encode_frame_FIX(
9595
opus_int i, iter, maxIter, found_upper, found_lower, ret = 0;
9696
opus_int16 *x_frame;
9797
ec_enc sRangeEnc_copy, sRangeEnc_copy2;
98-
silk_nsq_state sNSQ_copy, sNSQ_copy2;
98+
VARDECL(silk_nsq_state, sNSQ_copy);
9999
opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
100100
opus_int32 gainsID, gainsID_lower, gainsID_upper;
101101
opus_int16 gainMult_Q8;
@@ -108,6 +108,8 @@ opus_int silk_encode_frame_FIX(
108108
opus_int bits_margin;
109109
SAVE_STACK;
110110

111+
ALLOC(sNSQ_copy, 2, silk_nsq_state);
112+
111113
/* For CBR, 5 bits below budget is close enough. For VBR, allow up to 25% below the cap if we initially busted the budget. */
112114
bits_margin = useCBR ? 5 : maxBits/4;
113115
/* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
@@ -177,7 +179,7 @@ opus_int silk_encode_frame_FIX(
177179
gainsID_upper = -1;
178180
/* Copy part of the input state */
179181
silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
180-
silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
182+
silk_memcpy( &sNSQ_copy[0], &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
181183
seed_copy = psEnc->sCmn.indices.Seed;
182184
ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
183185
ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
@@ -191,7 +193,7 @@ opus_int silk_encode_frame_FIX(
191193
/* Restore part of the input state */
192194
if( iter > 0 ) {
193195
silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
194-
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
196+
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy[0], sizeof( silk_nsq_state ) );
195197
psEnc->sCmn.indices.Seed = seed_copy;
196198
psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
197199
psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
@@ -267,7 +269,7 @@ opus_int silk_encode_frame_FIX(
267269
silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
268270
celt_assert( sRangeEnc_copy2.offs <= 1275 );
269271
silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
270-
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
272+
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy[1], sizeof( silk_nsq_state ) );
271273
psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
272274
}
273275
break;
@@ -295,7 +297,7 @@ opus_int silk_encode_frame_FIX(
295297
silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
296298
celt_assert( psRangeEnc->offs <= 1275 );
297299
silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
298-
silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
300+
silk_memcpy( &sNSQ_copy[1], &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
299301
LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
300302
}
301303
} else {
@@ -394,16 +396,18 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
394396
{
395397
opus_int32 TempGains_Q16[ MAX_NB_SUBFR ];
396398
SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
397-
silk_nsq_state sNSQ_LBRR;
399+
VARDECL(silk_nsq_state, sNSQ_LBRR);
400+
SAVE_STACK;
398401

402+
ALLOC(sNSQ_LBRR, 1, silk_nsq_state);
399403
/*******************************************/
400404
/* Control use of inband LBRR */
401405
/*******************************************/
402406
if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
403407
psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
404408

405409
/* Copy noise shaping quantizer state and quantization indices from regular encoding */
406-
silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
410+
silk_memcpy( &sNSQ_LBRR[0], &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
407411
silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
408412

409413
/* Save original gains */
@@ -427,12 +431,12 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
427431
/* Noise shaping quantization */
428432
/*****************************************/
429433
if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
430-
silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16,
434+
silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR[0], psIndices_LBRR, x16,
431435
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
432436
psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
433437
psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
434438
} else {
435-
silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16,
439+
silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR[0], psIndices_LBRR, x16,
436440
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
437441
psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
438442
psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
@@ -441,4 +445,5 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
441445
/* Restore original gains */
442446
silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
443447
}
448+
RESTORE_STACK;
444449
}

silk/float/encode_frame_FLP.c

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
3232
#include <stdlib.h>
3333
#include "main_FLP.h"
3434
#include "tuning_parameters.h"
35+
#include "stack_alloc.h"
3536

3637
/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
3738
static OPUS_INLINE void silk_LBRR_encode_FLP(
@@ -95,19 +96,21 @@ opus_int silk_encode_frame_FLP(
9596
silk_float *x_frame, *res_pitch_frame;
9697
silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
9798
ec_enc sRangeEnc_copy, sRangeEnc_copy2;
98-
silk_nsq_state sNSQ_copy, sNSQ_copy2;
99+
VARDECL(silk_nsq_state, sNSQ_copy);
99100
opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
100101
opus_int32 gainsID, gainsID_lower, gainsID_upper;
101102
opus_int16 gainMult_Q8;
102103
opus_int16 ec_prevLagIndex_copy;
103104
opus_int ec_prevSignalType_copy;
104105
opus_int8 LastGainIndex_copy2;
105106
opus_int32 pGains_Q16[ MAX_NB_SUBFR ];
106-
opus_uint8 ec_buf_copy[ 1275 ];
107107
opus_int gain_lock[ MAX_NB_SUBFR ] = {0};
108108
opus_int16 best_gain_mult[ MAX_NB_SUBFR ];
109109
opus_int best_sum[ MAX_NB_SUBFR ];
110110
opus_int bits_margin;
111+
SAVE_STACK;
112+
113+
ALLOC(sNSQ_copy, 2, silk_nsq_state);
111114

112115
/* For CBR, 5 bits below budget is close enough. For VBR, allow up to 25% below the cap if we initially busted the budget. */
113116
bits_margin = useCBR ? 5 : maxBits/4;
@@ -139,6 +142,7 @@ opus_int silk_encode_frame_FLP(
139142
}
140143

141144
if( !psEnc->sCmn.prefillFlag ) {
145+
VARDECL( opus_uint8, ec_buf_copy );
142146
/*****************************************/
143147
/* Find pitch lags, initial LPC analysis */
144148
/*****************************************/
@@ -174,10 +178,11 @@ opus_int silk_encode_frame_FLP(
174178
gainsID_upper = -1;
175179
/* Copy part of the input state */
176180
silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
177-
silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
181+
silk_memcpy( &sNSQ_copy[0], &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
178182
seed_copy = psEnc->sCmn.indices.Seed;
179183
ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
180184
ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
185+
ALLOC( ec_buf_copy, 1275, opus_uint8 );
181186
for( iter = 0; ; iter++ ) {
182187
if( gainsID == gainsID_lower ) {
183188
nBits = nBits_lower;
@@ -187,7 +192,7 @@ opus_int silk_encode_frame_FLP(
187192
/* Restore part of the input state */
188193
if( iter > 0 ) {
189194
silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
190-
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
195+
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy[0], sizeof( silk_nsq_state ) );
191196
psEnc->sCmn.indices.Seed = seed_copy;
192197
psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
193198
psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
@@ -253,7 +258,7 @@ opus_int silk_encode_frame_FLP(
253258
silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
254259
celt_assert( sRangeEnc_copy2.offs <= 1275 );
255260
silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
256-
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
261+
silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy[1], sizeof( silk_nsq_state ) );
257262
psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
258263
}
259264
break;
@@ -283,7 +288,7 @@ opus_int silk_encode_frame_FLP(
283288
silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
284289
celt_assert( psRangeEnc->offs <= 1275 );
285290
silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
286-
silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
291+
silk_memcpy( &sNSQ_copy[1], &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
287292
LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
288293
}
289294
} else {
@@ -358,6 +363,7 @@ opus_int silk_encode_frame_FLP(
358363
if( psEnc->sCmn.prefillFlag ) {
359364
/* No payload */
360365
*pnBytesOut = 0;
366+
RESTORE_STACK;
361367
return ret;
362368
}
363369

@@ -372,6 +378,7 @@ opus_int silk_encode_frame_FLP(
372378
/* Payload size */
373379
*pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
374380

381+
RESTORE_STACK;
375382
return ret;
376383
}
377384

@@ -387,16 +394,18 @@ static OPUS_INLINE void silk_LBRR_encode_FLP(
387394
opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
388395
silk_float TempGains[ MAX_NB_SUBFR ];
389396
SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
390-
silk_nsq_state sNSQ_LBRR;
397+
VARDECL(silk_nsq_state, sNSQ_LBRR);
398+
SAVE_STACK;
391399

400+
ALLOC(sNSQ_LBRR, 1, silk_nsq_state);
392401
/*******************************************/
393402
/* Control use of inband LBRR */
394403
/*******************************************/
395404
if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
396405
psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
397406

398407
/* Copy noise shaping quantizer state and quantization indices from regular encoding */
399-
silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
408+
silk_memcpy( &sNSQ_LBRR[0], &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
400409
silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
401410

402411
/* Save original gains */
@@ -423,10 +432,11 @@ static OPUS_INLINE void silk_LBRR_encode_FLP(
423432
/*****************************************/
424433
/* Noise shaping quantization */
425434
/*****************************************/
426-
silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
435+
silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR[0],
427436
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
428437

429438
/* Restore original gains */
430439
silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
431440
}
441+
RESTORE_STACK;
432442
}

0 commit comments

Comments
 (0)