|
| 1 | +/* |
| 2 | + * |
| 3 | + * [email protected], 31. October, 2025. |
| 4 | + * switch_jb_deadlock.c -- Deadlock test for jitter buffer channel mutex ordering |
| 5 | + * |
| 6 | + * This test reproduces the deadlock between two threads: |
| 7 | + * Thread 1: switch_channel_event_set_data() -> switch_jb_get_frames() |
| 8 | + * Locks: channel mutex -> jb mutex |
| 9 | + * Thread 2: switch_jb_put_packet() -> new_node() -> switch_jb_reset() |
| 10 | + * Locks: jb mutex -> channel mutex (via switch_channel_set_variable_printf) |
| 11 | + * |
| 12 | + * The deadlock occurs when: |
| 13 | + * - Thread 1 holds channel mutex, wants jb mutex (in switch_jb_get_frames at line 1271) |
| 14 | + * - Thread 2 holds jb mutex, wants channel mutex (in switch_jb_reset at line 1178) |
| 15 | + */ |
| 16 | + |
| 17 | +#include <switch.h> |
| 18 | +#include <test/switch_test.h> |
| 19 | +#include <pthread.h> |
| 20 | +#include <stdlib.h> |
| 21 | +#include <time.h> |
| 22 | + |
| 23 | +#define MAX_TEST_THREADS 2 |
| 24 | +#define TEST_DURATION_SEC 10 |
| 25 | +#define PACKET_INJECTION_RATE_MS 1 /* Inject as fast as possible */ |
| 26 | + |
| 27 | +static volatile int should_stop_threads = 0; |
| 28 | +static volatile int deadlock_detected = 0; |
| 29 | +static volatile int thread1_iterations = 0; |
| 30 | +static volatile int thread2_iterations = 0; |
| 31 | +static switch_core_session_t *test_session = NULL; |
| 32 | +static switch_jb_t *test_jb = NULL; |
| 33 | +static switch_channel_t *test_channel = NULL; |
| 34 | + |
| 35 | +/* Thread 1: Simulates media bug destroy path from GDB backtrace |
| 36 | + * Calls switch_channel_event_set_data() which: |
| 37 | + * 1. Locks channel->profile_mutex |
| 38 | + * 2. Calls switch_channel_event_set_extended_data() |
| 39 | + * 3. Which calls switch_core_media_get_stats() |
| 40 | + * 4. Which calls switch_rtp_get_stats() -> switch_jb_get_frames() |
| 41 | + * 5. Which tries to lock jb->mutex |
| 42 | + * Lock ordering: channel mutex -> jb mutex |
| 43 | + */ |
| 44 | +static void *event_data_thread(void *arg) |
| 45 | +{ |
| 46 | + int iterations = 0; |
| 47 | + switch_event_t *event = NULL; |
| 48 | + |
| 49 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 50 | + "Thread 1 (event_data): Started - calling switch_channel_event_set_data()\n"); |
| 51 | + |
| 52 | + while (!should_stop_threads && iterations < 10000) { |
| 53 | + if (test_channel) { |
| 54 | + /* Create event and call switch_channel_event_set_data() */ |
| 55 | + if (switch_event_create(&event, SWITCH_EVENT_CHANNEL_DESTROY) == SWITCH_STATUS_SUCCESS) { |
| 56 | + /* THIS locks channel mutex, THEN tries to access jb stats */ |
| 57 | + switch_channel_event_set_data(test_channel, event); |
| 58 | + switch_event_destroy(&event); |
| 59 | + |
| 60 | + iterations++; |
| 61 | + __sync_add_and_fetch(&thread1_iterations, 1); |
| 62 | + |
| 63 | + if (iterations % 1000 == 0) { |
| 64 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 65 | + "Thread 1: %d iterations completed\n", iterations); |
| 66 | + } |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + /* Minimal delay to maximize collision probability */ |
| 71 | + switch_yield(100); |
| 72 | + } |
| 73 | + |
| 74 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 75 | + "Thread 1 (event_data): Completed %d iterations\n", iterations); |
| 76 | + return NULL; |
| 77 | +} |
| 78 | + |
| 79 | +/* Thread 2: Simulates RTP packet processing path |
| 80 | + * Feeds packets into jitter buffer to trigger the allocation limit |
| 81 | + * and force switch_jb_reset() to be called from new_node() |
| 82 | + * This locks: jb mutex -> channel mutex (in switch_jb_reset) |
| 83 | + */ |
| 84 | +static void *packet_injection_thread(void *arg) |
| 85 | +{ |
| 86 | + int iterations = 0; |
| 87 | + uint16_t seq_num = 1000; |
| 88 | + uint32_t timestamp = 160000; |
| 89 | + |
| 90 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 91 | + "Thread 2 (packet_injection): Started - simulating RTP packet processing\n"); |
| 92 | + |
| 93 | + while (!should_stop_threads && iterations < 5000) { |
| 94 | + if (test_jb) { |
| 95 | + switch_rtp_packet_t packet; |
| 96 | + switch_size_t packet_len = sizeof(packet.header) + 160; /* Typical audio packet */ |
| 97 | + |
| 98 | + memset(&packet, 0, sizeof(packet)); |
| 99 | + packet.header.version = 2; |
| 100 | + |
| 101 | + /* Use DIFFERENT sequence numbers with BIG GAPS to force node accumulation |
| 102 | + * Each packet gets a seq number far apart so they don't get consumed by jitter buffer */ |
| 103 | + packet.header.seq = htons(seq_num); |
| 104 | + seq_num += 100; /* Big gaps prevent jitter buffer from reading/consuming packets */ |
| 105 | + |
| 106 | + packet.header.ts = htonl(timestamp); |
| 107 | + timestamp += 160 * 100; |
| 108 | + |
| 109 | + /* Feed packets rapidly to force allocation limit and trigger reset */ |
| 110 | + switch_jb_put_packet(test_jb, &packet, packet_len); |
| 111 | + |
| 112 | + iterations++; |
| 113 | + __sync_add_and_fetch(&thread2_iterations, 1); |
| 114 | + |
| 115 | + if (iterations % 500 == 0) { |
| 116 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, |
| 117 | + "Thread 2: %d packets injected, seq=%u\n", iterations, ntohs(packet.header.seq)); |
| 118 | + } |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 123 | + "Thread 2 (packet_injection): Completed %d iterations\n", iterations); |
| 124 | + return NULL; |
| 125 | +} |
| 126 | + |
| 127 | +/* Watchdog thread to detect deadlock */ |
| 128 | +static void *watchdog_thread(void *arg) |
| 129 | +{ |
| 130 | + int check_count = 0; |
| 131 | + int prev_thread1_count = 0; |
| 132 | + int prev_thread2_count = 0; |
| 133 | + int stall_count = 0; |
| 134 | + int current_thread1 = 0; |
| 135 | + int current_thread2 = 0; |
| 136 | + int thread1_should_run = 0; |
| 137 | + int thread2_should_run = 0; |
| 138 | + |
| 139 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 140 | + "Watchdog: Started - monitoring for deadlock\n"); |
| 141 | + |
| 142 | + while (!should_stop_threads && check_count < TEST_DURATION_SEC * 2) { |
| 143 | + switch_sleep(500000); /* Check every 500ms */ |
| 144 | + |
| 145 | + current_thread1 = __sync_add_and_fetch(&thread1_iterations, 0); |
| 146 | + current_thread2 = __sync_add_and_fetch(&thread2_iterations, 0); |
| 147 | + |
| 148 | + /* Check if both threads are stalled AND still expected to be running |
| 149 | + * Thread 1 limit: 10000, Thread 2 limit: 5000 */ |
| 150 | + thread1_should_run = (current_thread1 < 10000); |
| 151 | + thread2_should_run = (current_thread2 < 5000); |
| 152 | + |
| 153 | + if (current_thread1 == prev_thread1_count && current_thread2 == prev_thread2_count) { |
| 154 | + /* Only count as stall if at least one thread should still be running */ |
| 155 | + if (thread1_should_run || thread2_should_run) { |
| 156 | + stall_count++; |
| 157 | + |
| 158 | + if (stall_count >= 4) { /* Both threads stalled for 2 seconds */ |
| 159 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, |
| 160 | + "DEADLOCK DETECTED! Thread 1: %d iterations (stalled, limit: 10000), Thread 2: %d iterations (stalled, limit: 5000)\n", |
| 161 | + current_thread1, current_thread2); |
| 162 | + deadlock_detected = 1; |
| 163 | + should_stop_threads = 1; |
| 164 | + break; |
| 165 | + } |
| 166 | + } else { |
| 167 | + /* Both threads completed their work normally */ |
| 168 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 169 | + "Watchdog: Both threads completed their iteration limits\n"); |
| 170 | + should_stop_threads = 1; |
| 171 | + break; |
| 172 | + } |
| 173 | + } else { |
| 174 | + stall_count = 0; |
| 175 | + } |
| 176 | + |
| 177 | + prev_thread1_count = current_thread1; |
| 178 | + prev_thread2_count = current_thread2; |
| 179 | + check_count++; |
| 180 | + } |
| 181 | + |
| 182 | + if (!deadlock_detected) { |
| 183 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 184 | + "Watchdog: No deadlock detected - threads completed normally\n"); |
| 185 | + } |
| 186 | + |
| 187 | + return NULL; |
| 188 | +} |
| 189 | + |
| 190 | +FST_CORE_BEGIN("./conf") |
| 191 | +{ |
| 192 | + FST_SUITE_BEGIN(switch_jb_deadlock) |
| 193 | + { |
| 194 | + FST_SETUP_BEGIN() |
| 195 | + { |
| 196 | + } |
| 197 | + FST_SETUP_END() |
| 198 | + |
| 199 | + FST_TEARDOWN_BEGIN() |
| 200 | + { |
| 201 | + should_stop_threads = 1; |
| 202 | + |
| 203 | + /* Clean up - jitter buffer is owned by RTP session, so don't destroy it separately */ |
| 204 | + test_jb = NULL; |
| 205 | + test_channel = NULL; |
| 206 | + |
| 207 | + if (test_session) { |
| 208 | + switch_channel_hangup(switch_core_session_get_channel(test_session), |
| 209 | + SWITCH_CAUSE_NORMAL_CLEARING); |
| 210 | + switch_core_session_rwunlock(test_session); |
| 211 | + test_session = NULL; |
| 212 | + } |
| 213 | + } |
| 214 | + FST_TEARDOWN_END() |
| 215 | + |
| 216 | + FST_SESSION_BEGIN(jb_channel_mutex_deadlock_test) |
| 217 | + { |
| 218 | + pthread_t threads[3]; |
| 219 | + switch_call_cause_t cause; |
| 220 | + switch_status_t status; |
| 221 | + int i; |
| 222 | + switch_memory_pool_t *pool = NULL; |
| 223 | + switch_rtp_t *rtp_session = NULL; |
| 224 | + switch_rtp_flag_t flags[SWITCH_RTP_FLAG_INVALID] = {0}; |
| 225 | + const char *err = NULL; |
| 226 | + switch_media_handle_t *media_handle = NULL; |
| 227 | + switch_core_media_params_t *mparams = NULL; |
| 228 | + |
| 229 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, |
| 230 | + "=== JITTER BUFFER CHANNEL MUTEX DEADLOCK TEST ===\n"); |
| 231 | + |
| 232 | + should_stop_threads = 0; |
| 233 | + deadlock_detected = 0; |
| 234 | + thread1_iterations = 0; |
| 235 | + thread2_iterations = 0; |
| 236 | + |
| 237 | + /* Create a test session with null endpoint (designed for testing) */ |
| 238 | + status = switch_ivr_originate(NULL, &test_session, &cause, |
| 239 | + "null/+15553334444", 2, NULL, NULL, NULL, NULL, NULL, |
| 240 | + SOF_NONE, NULL, NULL); |
| 241 | + |
| 242 | + if (status != SWITCH_STATUS_SUCCESS || !test_session) { |
| 243 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, |
| 244 | + "Failed to create test session, cause: %d\n", cause); |
| 245 | + fst_check(0); |
| 246 | + goto test_end; |
| 247 | + } |
| 248 | + |
| 249 | + test_channel = switch_core_session_get_channel(test_session); |
| 250 | + fst_requires(test_channel != NULL); |
| 251 | + |
| 252 | + /* Create media handle so switch_core_media_get_stats() actually works! */ |
| 253 | + pool = switch_core_session_get_pool(test_session); |
| 254 | + |
| 255 | + mparams = switch_core_session_alloc(test_session, sizeof(switch_core_media_params_t)); |
| 256 | + mparams->num_codecs = 1; |
| 257 | + mparams->inbound_codec_string = switch_core_session_strdup(test_session, "PCMU"); |
| 258 | + mparams->outbound_codec_string = switch_core_session_strdup(test_session, "PCMU"); |
| 259 | + mparams->rtpip = switch_core_session_strdup(test_session, "127.0.0.1"); |
| 260 | + |
| 261 | + status = switch_media_handle_create(&media_handle, test_session, mparams); |
| 262 | + if (status != SWITCH_STATUS_SUCCESS) { |
| 263 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, |
| 264 | + "Failed to create media handle\n"); |
| 265 | + fst_check(0); |
| 266 | + goto test_end; |
| 267 | + } |
| 268 | + |
| 269 | + /* Create RTP session with jitter buffer */ |
| 270 | + |
| 271 | + rtp_session = switch_rtp_new("127.0.0.1", 12340, "127.0.0.1", 12342, |
| 272 | + 0, 8000, 20 * 1000, flags, "soft", &err, pool); |
| 273 | + |
| 274 | + if (!rtp_session) { |
| 275 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, |
| 276 | + "Failed to create RTP session: %s\n", err ? err : "unknown"); |
| 277 | + fst_check(0); |
| 278 | + goto test_end; |
| 279 | + } |
| 280 | + |
| 281 | + /* Activate jitter buffer with LOW limits to trigger reset quickly */ |
| 282 | + status = switch_rtp_activate_jitter_buffer(rtp_session, 3, 5, 80, 8000); |
| 283 | + if (status != SWITCH_STATUS_SUCCESS) { |
| 284 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, |
| 285 | + "Failed to activate jitter buffer\n"); |
| 286 | + switch_rtp_destroy(&rtp_session); |
| 287 | + fst_check(0); |
| 288 | + goto test_end; |
| 289 | + } |
| 290 | + |
| 291 | + /* Get the jitter buffer from RTP session */ |
| 292 | + test_jb = switch_rtp_get_jitter_buffer(rtp_session); |
| 293 | + if (!test_jb) { |
| 294 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, |
| 295 | + "Failed to get jitter buffer from RTP session\n"); |
| 296 | + switch_rtp_destroy(&rtp_session); |
| 297 | + fst_check(0); |
| 298 | + goto test_end; |
| 299 | + } |
| 300 | + |
| 301 | + /* Enable jitter buffer debugging to see if reset is triggered (level 2 required) */ |
| 302 | + switch_rtp_debug_jitter_buffer(rtp_session, "2"); |
| 303 | + |
| 304 | + /* Attach RTP session to the test session's media so stats path works */ |
| 305 | + switch_core_media_set_rtp_session(test_session, SWITCH_MEDIA_TYPE_AUDIO, rtp_session); |
| 306 | + |
| 307 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 308 | + "Created session with media_handle, RTP, and jitter buffer (max_frame_len=5)\n"); |
| 309 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 310 | + "Jitter buffer debug enabled - watch for 'ALLOCATED FRAMES TOO HIGH' messages\n"); |
| 311 | + |
| 312 | + /* Start Thread 1: event_data (channel mutex -> jb mutex) */ |
| 313 | + fst_check(pthread_create(&threads[0], NULL, event_data_thread, NULL) == 0); |
| 314 | + |
| 315 | + /* Start Thread 2: packet_injection (jb mutex -> channel mutex) */ |
| 316 | + fst_check(pthread_create(&threads[1], NULL, packet_injection_thread, NULL) == 0); |
| 317 | + |
| 318 | + /* Start Watchdog thread */ |
| 319 | + fst_check(pthread_create(&threads[2], NULL, watchdog_thread, NULL) == 0); |
| 320 | + |
| 321 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, |
| 322 | + "Started 2 racing threads + 1 watchdog thread\n"); |
| 323 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, |
| 324 | + "Running test for %d seconds...\n", TEST_DURATION_SEC); |
| 325 | + |
| 326 | + /* Wait for all threads to complete or deadlock to be detected */ |
| 327 | + for (i = 0; i < 3; i++) { |
| 328 | + pthread_join(threads[i], NULL); |
| 329 | + } |
| 330 | + |
| 331 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, |
| 332 | + "Test completed. Thread 1: %d iterations, Thread 2: %d iterations\n", |
| 333 | + thread1_iterations, thread2_iterations); |
| 334 | + |
| 335 | + if (deadlock_detected) { |
| 336 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, |
| 337 | + "DEADLOCK WAS DETECTED!\n"); |
| 338 | + } else { |
| 339 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, |
| 340 | + "No deadlock detected - either the fix is applied or race didn't trigger\n"); |
| 341 | + } |
| 342 | + |
| 343 | + /* The test "passes" by detecting the deadlock (proving the bug exists) |
| 344 | + * or by completing without deadlock (if the fix is applied) |
| 345 | + * We'll mark it as successful either way since we're demonstrating the issue */ |
| 346 | + fst_check(1); |
| 347 | + |
| 348 | +test_end: |
| 349 | + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, |
| 350 | + "=== TEST END ===\n"); |
| 351 | + } |
| 352 | + FST_SESSION_END() |
| 353 | + } |
| 354 | + FST_SUITE_END() |
| 355 | +} |
| 356 | +FST_CORE_END() |
0 commit comments