费解!!!调用pthread_exit发生crash
各位老司机,技术大牛,大高玩,大家好:
小弟第一次发帖,新手一枚。遇到一个比较费解的问题,想请教一下各位。
环境:Switch运行在Linux上
描述:Main函数创建一线程叫memscan,在main函数里面,主线程会不断调用函数去start,stop memscan线程。运行一段时间后,memscan线程在退出的时候(调用pthread_exit)发生宕机。
bt如下:
(gdb) bt
#0 0x022de0c8 in _Unwind_IteratePhdrCallback ()
#1 0x02320394 in dl_iterate_phdr ()
#2 0x022deafc in _Unwind_Find_FDE ()
#3 0x022dab84 in uw_frame_state_for ()
#4 0x022db89c in _Unwind_ForcedUnwind_Phase2 ()
#5 0x022dc254 in _Unwind_ForcedUnwind ()
#6 0x022b513c in __pthread_unwind ()
#7 0x022b2a6c in pthread_exit ()
#8 0x022382f8 in sal_thread_exit ()
#9 0x013ea454 in _soc_mem_scan_thread ()
#10 0x022383f0 in thread_boot ()
#11 0x022b1b98 in start_thread ()
#12 0x02318574 in __thread_start ()
Backtrace stopped: frame did not save the PC
(gdb)
Main函数调用以下接口去start memscan线程:
int
soc_mem_scan_start(int unit, int rate, sal_usecs_t interval)
{
soc_control_t *soc = SOC_CONTROL(unit);
int pri;
if (soc->mem_scan_pid != SAL_THREAD_ERROR) {
SOC_IF_ERROR_RETURN(soc_mem_scan_stop(unit));
}
sal_snprintf(soc->mem_scan_name, sizeof (soc->mem_scan_name),
"bcmMEM_SCAN.%d", unit);
soc->mem_scan_rate = rate;
soc->mem_scan_interval = interval;
if (interval == 0) {
return SOC_E_NONE;
}
if (soc->mem_scan_pid == SAL_THREAD_ERROR) {
pri = soc_property_get(unit, spn_MEM_SCAN_THREAD_PRI, 50);
/* Create mem_scan thread here */
soc->mem_scan_pid = sal_thread_create(soc->mem_scan_name,
SAL_THREAD_STKSZ,
pri,
_soc_mem_scan_thread,
INT_TO_PTR(unit));
if (soc->mem_scan_pid == SAL_THREAD_ERROR) {
return SOC_E_MEMORY;
}
return SOC_E_NONE;
}
Main函数调用以下接口去stop memscan线程:
int
soc_mem_scan_stop(int unit)
{
soc_control_t *soc = SOC_CONTROL(unit);
int rv = SOC_E_NONE;
soc_timeout_t to;
soc->mem_scan_interval = 0; /* Request exit */
if (soc->mem_scan_pid != SAL_THREAD_ERROR) {
/* Wake up thread so it will check the exit flag */
sal_sem_give(soc->mem_scan_notify);
soc_timeout_init(&to, TOSEC * 1000000, 0);
while (soc->mem_scan_pid != SAL_THREAD_ERROR) {
/* Wait some time for mem_scan to exit */
if (soc_timeout_check(&to)) {
rv = SOC_E_INTERNAL;
break;
}
}
}
return rv;
}
Main线程类似下面:
void main() {
int unit = 0;
int rate = 8192, interval=1000;
while(1) {
if (soc_mem_scan_start(unit, rate, interval)) {
break;
}
if (soc_mem_scan_stop(unit)) {
break;
}
}
return;
}
Memscan线程类似下面:
STATIC void
_soc_mem_scan_thread(void *unit_vp)
{
...
while ((interval = soc->mem_scan_interval) != 0) {
if (soc->mem_scan_interval != 0) {
sal_sem_take(soc->mem_scan_notify, interval);
entries_interval = 0;
}
......
}
cleanup_exit:
soc->mem_scan_pid = SAL_THREAD_ERROR;
sal_thread_exit(0); <----Segfault happens here.
}
PS:这个问题很难复现,唯一线索只有那个bt。从代码上看没发现什么猫腻。