Service is crashing from time to time and it crashed 13 times today. Please find the bt full traces below
Sig 11:
(gdb) bt full #0 0x00000000006a16fc in timer_list_expire (t=1339518316, h=0x7facee484fd0, slow_l=0x7facee488208, slow_mark=7936) at core/timer.c:846 tl = 0x7facf048e1b8 ret = 0 #1 0x00000000006a1c19 in timer_handler () at core/timer.c:922 saved_ticks = 1339518316 run_slow_timer = 0 i = 768 __FUNCTION__ = "timer_handler" #2 0x00000000006a20ba in timer_main () at core/timer.c:961 No locals. #3 0x000000000042ac0a in main_loop () at main.c:1753 i = 32 pid = 0 si = 0x0 si_desc = "udp receiver child=31 sock=10.161.224.211:5060", '\000' <repeats 11 times>, "\354\325K\346\060\375\350\003\000\000\000\314\000\000\000\365k\000\000\000\000\000\000P\272A\000\000\000\000\000P\343o%\374\177", '\000' <repeats 18 times>, "\320\335o%\374\177\000\000\215\006Z\000\000\000\000" nrprocs = 32 woneinit = 1 __FUNCTION__ = "main_loop" #4 0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856 cfg_stream = 0x2674010 c = -1 r = 0 tmp = 0x7ffc256fff2c "" tmp_len = 32685 port = -1767840736 proto = 32685 ahost = 0x0 aport = 0 options = 0x7e2058 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 1101782887 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x7fad96a1d5f7 p = 0x7fad96a0e77c "free" st = {st_dev = 17, st_ino = 15237, st_nlink = 2, st_mode = 16877, st_uid = 105, st_gid = 112, __pad0 = 0, st_rdev = 0, st_size = 120, st_blksize = 4096, st_blocks = 0, st_atim = { tv_sec = 1607000056, tv_nsec = 100000000}, st_mtim = {tv_sec = 1607002632, tv_nsec = 716000000}, st_ctim = {tv_sec = 1607003887, tv_nsec = 44000000}, __glibc_reserved = {0, 0, 0}} tbuf = "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\210'Ö\255\177\000\000\030\207\241\226\255\177\000\000\000\000\000\000\000\000\000\000\220\340o%\374\177\000\000`\261w%\374\177\000\000\240\341o%\374\177\000\000رw%\374\177\000\000\220\341o%\374\177\000\000\000\000\000\000\000\000\000\000<\177\241\226\255\177\000\000\000\000\000\000\000\000\000\000.N=\366\000\000\000\000\004\000\000\000\000\000\000\000\006\000\000\000\000\000\000\000\250u\302\226\255\177\000\000\266\210\241\226\255\177", '\000' <repeats 11 times>, "\341o%\374\177\000\000\060=\362\225\255\177\000\000\020\342o%\374\177\000\000\370\001\363\225\255\177\000\000\070\365\330\003\000\000\000\000\000\342o%\374\177\000\000\350*Ö"... option_index = 0 long_options = {{name = 0x7e424f "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x7df854 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x7e4254 "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x7e425a "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x7e4260 "substdef", has_arg = 1, flag = 0x0, val = 1026}, { name = 0x7e4269 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x7e4273 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x7e427d "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x7e4288 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x7e4291 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, { name = 0x7e429c "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __FUNCTION__ = "main" (gdb)
Sig 6:
(gdb) bt full #0 0x00007fad95f56c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6 No symbol table info available. #1 0x00007fad95f5a028 in abort () from /lib/x86_64-linux-gnu/libc.so.6 No symbol table info available. #2 0x000000000041e39d in sig_alarm_abort (signo=14) at main.c:687 __FUNCTION__ = "sig_alarm_abort" #3 <signal handler called> No symbol table info available. #4 0x00007fad960183c7 in syscall () from /lib/x86_64-linux-gnu/libc.so.6 No symbol table info available. #5 0x00007fad6f5c024b in futex_get (lock=0x7facee70baa0) at ../../core/mem/../futexlock.h:121 v = 1 i = 0 #6 0x00007fad6f5c2167 in get_peer_by_fqdn (fqdn=0x7faceeb377b0) at peermanager.c:240 i = 0x7f4d8a #7 0x00007fad6f5e6c31 in get_first_connected_route (cdp_session=0x7faceeb37768, r=0x7facee70a220, app_id=16777236, vendor_id=10415) at routing.c:110 peers = {0x7facee9f4870, 0x18, 0x1, 0x0, 0x102ef763ea8, 0xc, 0x0, 0x11b00000000, 0x0, 0x7facef617fb0, 0x7facf0a3f2d0, 0x7facef2b5f90, 0x8, 0x7facee407000, 0x7ffc256fd0d0, 0x7ffc256fd0dc, 0x3007df3d8, 0x7fad6f617680, 0x41b610 syslog@plt, 0x7f4d8a} peer_count = 0 prev_metric = 0 i = 0x1007f4d8a p = 0x7ffc256fcff0 j = 628084656 least_recent_time = 7234707 time_spec = {tv_sec = 140382874676976, tv_nsec = 1569531277302} __FUNCTION__ = "get_first_connected_route" #8 0x00007fad6f5ea8b2 in get_routing_peer (cdp_session=0x7faceeb37768, m=0x7facef617fb0) at routing.c:272 destination_realm = {s = 0x7facef6d38f0 "isbvpcrf001.zte.com", len = 19} destination_host = {s = 0x0, len = 0} avp = 0x7facef2b5f90 avp_vendor = 0x0 avp2 = 0x0 group = {head = 0x0, tail = 0x0} p = 0x0 rr = 0x0 app_id = 16777236 vendor_id = 10415 __FUNCTION__ = "get_routing_peer" #9 0x00007fad6f5ac72d in AAASendMessage (message=0x7facef617fb0, callback_f=0x0, callback_param=0x0) at diameter_comm.c:137 cdp_session = 0x7faceeb37768 p = 0x7facef7fdce0 __FUNCTION__ = "AAASendMessage" #10 0x00007fad6ef05dbb in rx_send_str (rx_session_id=0x7facf0849d50) at rx_str.c:157 __FUNCTION__ = "rx_send_str" auth = 0x7faceeb37768 str = 0x7facef617fb0 avp = 0x7facef2b5f90 x = "\000\000\000\001" #11 0x00007fad6eeb25c4 in callback_dialog (dlg=0x7facf0036d70, type=8192, params=0x7fad72b2a1c0 <params>) at ims_qos_mod.c:453 msg = 0x0 parsed_cseq = 0x7facee4406c8 rx_session_id = 0x7facf0849d50 ---Type <return> to continue, or q <return> to quit--- auth = 0x0 p_session_data = 0x0 current_fd = 0x0 new_fd = 0x0 current_has_video = 0 new_has_video = 0 must_unlock_aaa = 1 __FUNCTION__ = "callback_dialog" #12 0x00007fad728973af in run_dlg_callbacks (type=8192, dlg=0x7facf0036d70, req=0x0, rpl=0x0, dir=0, dlg_data=0x0) at dlg_cb.c:274 cb = 0x7facf0849e68 __FUNCTION__ = "run_dlg_callbacks" #13 0x00007fad728d5315 in destroy_dlg (dlg=0x7facf0036d70) at dlg_hash.c:219 ret = 0 var = 0x7faceea76348 __FUNCTION__ = "destroy_dlg" #14 0x00007fad728d5b1b in destroy_dlg_table () at dlg_hash.c:291 dlg = 0x0 l_dlg = 0x7facf0036d70 i = 120 __FUNCTION__ = "destroy_dlg_table" #15 0x00007fad7290b71e in mod_destroy () at ims_dialog.c:627 No locals. #16 0x0000000000616ae1 in destroy_modules () at core/sr_module.c:746 t = 0x7fad75fcae18 foo = 0x7fad75fca710 __FUNCTION__ = "destroy_modules" #17 0x000000000041ce17 in cleanup (show_status=1) at main.c:563 memlog = 0 __FUNCTION__ = "cleanup" #18 0x000000000041e6ef in shutdown_children (sig=15, show_status=1) at main.c:706 __FUNCTION__ = "shutdown_children" #19 0x0000000000421755 in handle_sigs () at main.c:806 chld = 0 chld_status = 139 any_chld_stopped = 1 memlog = 0 __FUNCTION__ = "handle_sigs" #20 0x000000000042bc85 in main_loop () at main.c:1817 i = 32 pid = 27887 si = 0x0 si_desc = "udp receiver child=31 sock=10.161.224.211:5060", '\000' <repeats 11 times>, "\354\325K\346\060\375\350\003\000\000\000\314\000\000\000\365k\000\000\000\000\000\000P\272A\000\000\000\000\000P\343o%\374\177", '\000' <repeats 18 times>, "\320\335o%\374\177\000\000\215\006Z\000\000\000\000" nrprocs = 32 woneinit = 1 __FUNCTION__ = "main_loop" #21 0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856 cfg_stream = 0x2674010 c = -1 r = 0 tmp = 0x7ffc256fff2c "" tmp_len = 32685 ---Type <return> to continue, or q <return> to quit--- port = -1767840736 proto = 32685 ahost = 0x0 aport = 0 options = 0x7e2058 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 1101782887 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x7fad96a1d5f7 p = 0x7fad96a0e77c "free" st = {st_dev = 17, st_ino = 15237, st_nlink = 2, st_mode = 16877, st_uid = 105, st_gid = 112, __pad0 = 0, st_rdev = 0, st_size = 120, st_blksize = 4096, st_blocks = 0, st_atim = { tv_sec = 1607000056, tv_nsec = 100000000}, st_mtim = {tv_sec = 1607002632, tv_nsec = 716000000}, st_ctim = {tv_sec = 1607003887, tv_nsec = 44000000}, __glibc_reserved = {0, 0, 0}} tbuf = "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\210'Ö\255\177\000\000\030\207\241\226\255\177\000\000\000\000\000\000\000\000\000\000\220\340o%\374\177\000\000`\261w%\374\177\000\000\240\341o%\374\177\000\000رw%\374\177\000\000\220\341o%\374\177\000\000\000\000\000\000\000\000\000\000<\177\241\226\255\177\000\000\000\000\000\000\000\000\000\000.N=\366\000\000\000\000\004\000\000\000\000\000\000\000\006\000\000\000\000\000\000\000\250u\302\226\255\177\000\000\266\210\241\226\255\177", '\000' <repeats 11 times>, "\341o%\374\177\000\000\060=\362\225\255\177\000\000\020\342o%\374\177\000\000\370\001\363\225\255\177\000\000\070\365\330\003\000\000\000\000\000\342o%\374\177\000\000\350*Ö"... option_index = 0 long_options = {{name = 0x7e424f "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x7df854 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x7e4254 "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x7e425a "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x7e4260 "substdef", has_arg = 1, flag = 0x0, val = 1026}, { name = 0x7e4269 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x7e4273 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x7e427d "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x7e4288 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x7e4291 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, { name = 0x7e429c "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __FUNCTION__ = "main" (gdb)
I observed that service crash on Signal 11 and when service starts again it immediately crashes on Sig 6. After that service remains up for some time. Moreover, there are many following errors of udp_send_failed and tcp_read error.
#0 0x00007f668aa94013 in recvfrom () from /lib/x86_64-linux-gnu/libc.so.6 No symbol table info available. #1 0x00000000006638d0 in udp_rcv_loop () at core/udp_server.c:460 len = 638 buf = "SUBSCRIBE tel:+923492324215 SIP/2.0\r\nFrom: tel:+923492324215;tag=2869510412\r\nTo: tel:+923492324215\r\nCSeq: 722026762 SUBSCRIBE\r\nCall-ID: 2869510410_3596082716@10.100.217.241\r\nVia: SIP/2.0/UDP 10.100"... tmp = 0xdf831260 <error: Cannot access memory at address 0xdf831260> from = 0x7f666ac86e78 fromlen = 16
and
#0 0x00007f668aa94013 in recvfrom () from /lib/x86_64-linux-gnu/libc.so.6 No symbol table info available. #1 0x00000000006638d0 in udp_rcv_loop () at core/udp_server.c:460 len = 952 buf = "REGISTER sip:ims.mnc006.mcc410.3gppnetwork.org SIP/2.0\r\nVia: SIP/2.0/UDP 10.100.213.50;rport;branch=z9hG4bKL60mek0i6jXu6V\r\nFrom: sip:410060265715793@ims.mnc006.mcc410.3gppnetwork.org;tag=jYbVBQiAfL1"... tmp = 0xdf831260 <error: Cannot access memory at address 0xdf831260> from = 0x7f666ac86e78 fromlen = 16 ri = {src_ip = {af = 2, len = 4, u = {addrl = {140072621270026, 140723275665552}, addr32 = {852845578, 32613, 2967179408, 32764}, addr16 = {25610, 13013, 32613, 0, 37008, 45275, 32764, 0}, addr = "\nd\325\062e\177\000\000\220\220۰\374\177\000"}}, dst_ip = {af = 2, len = 4, u = {addrl = {3839926538, 0}, addr32 = {3839926538, 0, 0, 0}, addr16 = {41226, 58592, 0, 0, 0, 0, 0, 0}, addr = "\n\241\340\344", '\000' <repeats 11 times>}}, src_port = 5060, dst_port = 5060, proto_reserved1 = 0, proto_reserved2 = 0, src_su = {s = {sa_family = 2, sa_data = "\023\304\nd\325\062\000\000\000\000\000\000\000"}, sin = { sin_family = 2, sin_port = 50195, sin_addr = {s_addr = 852845578}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 50195, sin6_flowinfo = 852845578, sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}}, bind_address = 0x7f666aa23d78, proto = 1 '\001'} evp = {data = 0x0, rcv = 0x0, dst = 0x0} printbuf = "P\221۰\374\177\000\000\005\364<\337e\177\000\000 \221۰\374\177\000\000\265˧\337e\177\000\000\340\223۰\374\177\000\000L\221۰\374\177\000\000\340\223۰\004\000\000\000\360\005|\000\016\000\000\000n\365\023o\016\000\000\000\177$\000\000\000\000\000\000P\221۰\374\177\000\000(6\373\337e\177\000\000`\211A\000\000\000\000\000`\227۰\374\177", '\000' <repeats 18 times>, "\020\222۰\374\177\000\000\035\261]", '\000' <repeats 21 times>, "\300\022\262\000\000\000\000\000I/e\025\000\000\000\000`\211A\000\000\000\000\000`\227۰\374\177", '\000' <repeats 19 times>, "\222"... i = -537187496 j = 32613 l = 32613 __FUNCTION__ = "udp_rcv_loop" #2 0x0000000000424c4c in main_loop () at main.c:1628
This VM PCSCF node is created on OpenStack.
Regards
Hamid R. Hashmi
________________________________ From: Daniel-Constantin Mierla miconda@gmail.com Sent: Monday, December 7, 2020 5:53 PM To: Kamailio (SER) - Users Mailing List sr-users@lists.kamailio.org; Hamid Hashmi hamid2kviii@hotmail.com; Henning Westerholt hw@skalatan.de Subject: Re: [SR-Users] kamailio-5.2.0 Crash - udp_send failed
Hello,
get also the output of "bt full" from gdb.
I see you use ims moudules, they are known to have some limitations for various settings. Is this something that you can reproduce, or is something that happens from time to time without any pattern?
Cheers, Daniel
On 06.12.20 20:13, Hamid Hashmi wrote: Hello
I have updated Kamailio from 5.2.0 to 5.4.2 but the service is still crashing with sig 11 and sig 6. Please find the attached 'bt full' traces.
Sig 11:
Core was generated by `/usr/local/sbin/kamailio -f /usr/local/etc/kamailio_pcscf/kamailio.cfg -P /var/'. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x00000000006a16fc in timer_list_expire (t=1339518316, h=0x7facee484fd0, slow_l=0x7facee488208, slow_mark=7936) at core/timer.c:846 846 _timer_rm_list(tl); /* detach */ (gdb) bt #0 0x00000000006a16fc in timer_list_expire (t=1339518316, h=0x7facee484fd0, slow_l=0x7facee488208, slow_mark=7936) at core/timer.c:846 #1 0x00000000006a1c19 in timer_handler () at core/timer.c:922 #2 0x00000000006a20ba in timer_main () at core/timer.c:961 #3 0x000000000042ac0a in main_loop () at main.c:1753 #4 0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856
Sig 6:
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Core was generated by `/usr/local/sbin/kamailio -f /usr/local/etc/kamailio_pcscf/kamailio.cfg -P /var/'. Program terminated with signal SIGABRT, Aborted. #0 0x00007fad95f56c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6 (gdb) bt #0 0x00007fad95f56c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6 #1 0x00007fad95f5a028 in abort () from /lib/x86_64-linux-gnu/libc.so.6 #2 0x000000000041e39d in sig_alarm_abort (signo=14) at main.c:687 #3 <signal handler called> #4 0x00007fad960183c7 in syscall () from /lib/x86_64-linux-gnu/libc.so.6 #5 0x00007fad6f5c024b in futex_get (lock=0x7facee70baa0) at ../../core/mem/../futexlock.h:121 #6 0x00007fad6f5c2167 in get_peer_by_fqdn (fqdn=0x7faceeb377b0) at peermanager.c:240 #7 0x00007fad6f5e6c31 in get_first_connected_route (cdp_session=0x7faceeb37768, r=0x7facee70a220, app_id=16777236, vendor_id=10415) at routing.c:110 #8 0x00007fad6f5ea8b2 in get_routing_peer (cdp_session=0x7faceeb37768, m=0x7facef617fb0) at routing.c:272 #9 0x00007fad6f5ac72d in AAASendMessage (message=0x7facef617fb0, callback_f=0x0, callback_param=0x0) at diameter_comm.c:137 #10 0x00007fad6ef05dbb in rx_send_str (rx_session_id=0x7facf0849d50) at rx_str.c:157 #11 0x00007fad6eeb25c4 in callback_dialog (dlg=0x7facf0036d70, type=8192, params=0x7fad72b2a1c0 <params>) at ims_qos_mod.c:453 #12 0x00007fad728973af in run_dlg_callbacks (type=8192, dlg=0x7facf0036d70, req=0x0, rpl=0x0, dir=0, dlg_data=0x0) at dlg_cb.c:274 #13 0x00007fad728d5315 in destroy_dlg (dlg=0x7facf0036d70) at dlg_hash.c:219 #14 0x00007fad728d5b1b in destroy_dlg_table () at dlg_hash.c:291 #15 0x00007fad7290b71e in mod_destroy () at ims_dialog.c:627 #16 0x0000000000616ae1 in destroy_modules () at core/sr_module.c:746 #17 0x000000000041ce17 in cleanup (show_status=1) at main.c:563 #18 0x000000000041e6ef in shutdown_children (sig=15, show_status=1) at main.c:706 #19 0x0000000000421755 in handle_sigs () at main.c:806 #20 0x000000000042bc85 in main_loop () at main.c:1817 #21 0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856
PS: I am using ubuntu 14.04.
Regards
Hamid R. Hashmi
________________________________ From: Henning Westerholt hw@skalatan.demailto:hw@skalatan.de Sent: Saturday, September 26, 2020 12:04 PM To: Kamailio (SER) - Users Mailing List sr-users@lists.kamailio.orgmailto:sr-users@lists.kamailio.org Cc: Hamid Hashmi hamid2kviii@hotmail.commailto:hamid2kviii@hotmail.com Subject: Re: kamailio-5.2.0 Crash - udp_send failed
Hello,
you are using a old and non released dev version of kamailio. These are meant only for development purposes.
Please update to a proper version (like the latest one based on 5.3 or 5.4 branch.
Then try to reproduce the bug again and report on the list or by creating an issue.
Cheers,
Henning
-- Henning Westerholt - https://skalatan.de/blog/ Kamailio services - https://skalatan.de/services
________________________________ From: sr-users sr-users-bounces@lists.kamailio.orgmailto:sr-users-bounces@lists.kamailio.org on behalf of Hamid Hashmi hamid2kviii@hotmail.commailto:hamid2kviii@hotmail.com Sent: Friday, September 25, 2020 9:28:07 PM To: sr-users@lists.kamailio.orgmailto:sr-users@lists.kamailio.org sr-users@lists.kamailio.orgmailto:sr-users@lists.kamailio.org Subject: [SR-Users] kamailio-5.2.0 Crash - udp_send failed