Steps: (1) 9 users and 1 mcu registered the server (2) All 9 users connect to the mcu through the server (3) For a while, some users showed registration fail, we found Kamailio does not respond to any further SIP message.
I checked this using `netstat -apn` and it showed:
``` Active Internet connections (servers and established) Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name tcp 0 0 10.100.92.42:5060 0.0.0.0:* LISTEN 9384/kamailio tcp 0 0 10.100.92.42:5061 0.0.0.0:* LISTEN 9384/kamailio tcp 48963 0 10.100.92.42:5060 10.100.92.42:42177 ESTABLISHED 9375/kamailio tcp 29621 0 10.100.92.42:5060 10.100.92.40:34392 ESTABLISHED 9375/kamailio tcp 1617 0 10.100.92.42:43997 10.100.92.42:5062 ESTABLISHED 9375/kamailio tcp 2506 0 10.100.92.42:5060 10.100.92.68:37386 ESTABLISHED 9383/kamailio udp 0 0 127.0.0.1:50180 127.0.0.1:7722 ESTABLISHED 9384/kamailio udp 0 0 127.0.0.1:47125 127.0.0.1:7722 ESTABLISHED 9378/kamailio udp 0 0 127.0.0.1:55318 127.0.0.1:7722 ESTABLISHED 9375/kamailio udp 0 0 127.0.0.1:44595 127.0.0.1:7722 ESTABLISHED 9382/kamailio udp 0 0 127.0.0.1:41045 127.0.0.1:7722 ESTABLISHED 9383/kamailio udp 0 0 127.0.0.1:52901 127.0.0.1:7722 ESTABLISHED 9380/kamailio udp 0 0 127.0.0.1:39078 127.0.0.1:7722 ESTABLISHED 9376/kamailio udp 0 0 127.0.0.1:36548 127.0.0.1:7722 ESTABLISHED 9377/kamailio udp 0 0 127.0.0.1:55005 127.0.0.1:7722 ESTABLISHED 9374/kamailio udp 0 0 127.0.0.1:53049 127.0.0.1:7722 ESTABLISHED 9381/kamailio udp 0 0 127.0.0.1:44358 127.0.0.1:7722 ESTABLISHED 9374/kamailio udp 0 0 127.0.0.1:60253 127.0.0.1:7722 ESTABLISHED 9379/kamailio udp 0 0 10.100.92.42:5060 0.0.0.0:* 9374/kamailio udp 0 0 ::ffff:127.0.0.1:7722 :::* 9354/rtpengine raw 0 0 0.0.0.0:255 0.0.0.0:* 255 9374/kamailio ```
And I attached the gdb to the those processed which Recv-Q value were high, it showed:
PID [9375] ``` #0 0x400a7450 in recvfrom () from /lib/libpthread.so.0 No symbol table info available. #1 0x00265848 in udp_rcv_loop () at udp_server.c:446 len = 472 buf = "REGISTER sip:10.100.92.42:5060 SIP/2.0\r\nVia: SIP/2.0/UDP 10.100.91.13:5060;rport;branch=z9hG4bK649620372\r\nFrom: sip:DEMOROOM_2@10.100.92.42;tag=658334621\r\nTo: sip:DEMOROOM_2@10.100.92.42\r\nCall-ID:"... tmp = 0x49fbfc "10.100.92.43" from = 0x402d5b24 fromlen = 16 ri = {src_ip = {af = 2, len = 4, u = {addrl = {224093194, 3200776404, 1935216, 1734536862}, addr32 = {224093194, 3200776404, 1935216, 1734536862}, addr16 = {25610, 3419, 63700, 48839, 34672, 29, 61086, 26466}, addr = "\nd[\r\324\370\307\276p\207\035\000\236\356bg"}}, dst_ip = {af = 2, len = 4, u = {addrl = {710697994, 0, 0, 0}, addr32 = {710697994, 0, 0, 0}, addr16 = {25610, 10844, 0, 0, 0, 0, 0, 0}, addr = "\nd\*", '\000' <repeats 11 times>}}, src_port = 5060, dst_port = 5060, proto_reserved1 = 0, proto_reserved2 = 0, src_su = {s = {sa_family = 2, sa_data = "\023\304\nd[\r\000\000\000\000\000\000\000"}, sin = { sin_family = 2, sin_port = 50195, sin_addr = {s_addr = 224093194}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 50195, sin6_flowinfo = 224093194, sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}}, bind_address = 0x40261e14, proto = 1 '\001'} __FUNCTION__ = "udp_rcv_loop" #2 0x000c90a0 in main_loop () at main.c:1629 i = 0 pid = 0 si = 0x40261e14 si_desc = "udp receiver child=0 sock=10.100.92.42:5060\000\244\372\307\276\304y2A\000\000,\000\000\000X\000 \000\062@ \313'@h\t;\000\230\325&@\000\000\000\000x\021\002\000\060K1@\000\000\000\000d\372\307\276\260\275'\000\230\331\070\000X\311\070\000\030\000\000\000\000PVA\000\000\000\000\000\000\a@Lu\223A" r = 1074200576 nrprocs = 1 __FUNCTION__ = "main_loop" #3 0x000d0830 in main (argc=5, argv=0xbec7fd04) at main.c:2581 cfg_stream = 0x4b5008 c = -1 r = 0 tmp = 0x40070ac4 "P\002\006@\a" tmp_len = -1094190232 port = 1074045560 proto = -1094190164 options = 0x383880 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:" ret = -1 seed = 3985271057 rfd = 206 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x0 p = 0x5 <Address 0x5 out of bounds> __FUNCTION__ = "main" ```
PID [9383]
``` #0 0x400a6a04 in __lll_lock_wait () from /lib/libpthread.so.0 #1 0x400a10b8 in pthread_mutex_lock () from /lib/libpthread.so.0 #2 0x002145b4 in tcpconn_do_send (fd=21, c=0x41969534, buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP 10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom: sip:10.100.92.42:42177;tag=1996784611\r\nTo: sip:10.100.92.42:5060;tag=e926"..., len=340, send_flags=..., resp=0xbec7daf8, locked=0) at tcp_main.c:2520 #3 0x002130f0 in tcpconn_send_put (c=0x41969534, buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP 10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom: sip:10.100.92.42:42177;tag=1996784611\r\nTo: sip:10.100.92.42:5060;tag=e926"..., len=340, send_flags=...) at tcp_main.c:2377 #4 0x002105c0 in tcp_send (dst=0xbec7e38c, from=0x0, buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP 10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom: sip:10.100.92.42:42177;tag=1996784611\r\nTo: sip:10.100.92.42:5060;tag=e926"..., len=340) at tcp_main.c:2133 #5 0x40e33164 in msg_send (dst=0xbec7e38c, buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP 10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom: sip:10.100.92.42:42177;tag=1996784611\r\nTo: sip:10.100.92.42:5060;tag=e926"..., len=340) at ../../forward.h:225 #6 0x40e362e8 in sl_reply_helper (msg=0x40342814, code=200, reason=0x402e1d6c "Keepalive", tag=0x0) at sl_funcs.c:224 #7 0x40e373e0 in sl_send_reply (msg=0x40342814, code=200, reason=0x402e1d6c "Keepalive") at sl_funcs.c:327 #8 0x40e2b30c in w_sl_send_reply (msg=0x40342814, p1=0x40282108 "\244\324(@\001", p2=0x40282140 "d\320(@ ") at sl.c:236 #9 0x00029b90 in do_action (h=0xbec7f110, a=0x4028cf34, msg=0x40342814) at action.c:1100 #10 0x00036ce8 in run_actions (h=0xbec7f110, a=0x4028cf34, msg=0x40342814) at action.c:1583 #11 0x00029a2c in do_action (h=0xbec7f110, a=0x4028d120, msg=0x40342814) at action.c:1079 #12 0x00036ce8 in run_actions (h=0xbec7f110, a=0x40288ed8, msg=0x40342814) at action.c:1583 #13 0x000263ac in do_action (h=0xbec7f110, a=0x40279db8, msg=0x40342814) at action.c:712 #14 0x00036ce8 in run_actions (h=0xbec7f110, a=0x4027321c, msg=0x40342814) at action.c:1583 #15 0x00037538 in run_top_route (a=0x4027321c, msg=0x40342814, c=0x0) at action.c:1669 #16 0x0013ce80 in receive_msg ( buf=0x41969714 "OPTIONS sip:10.100.92.42:5060 SIP/2.0\r\nVia: SIP/2.0/TCP 10.100.92.42:5062;rport;branch=z9hG4bK416855936\r\nFrom: sip:10.100.92.42:42177;tag=1996784611\r\nTo: sip:10.100.92.42:5060\r\nCall-ID: 2059319404"..., len=404, rcv_info=0x4196955c) at receive.c:216 #17 0x00248fbc in receive_tcp_msg ( tcpbuf=0x41969714 "OPTIONS sip:10.100.92.42:5060 SIP/2.0\r\nVia: SIP/2.0/TCP 10.100.92.42:5062;rport;branch=z9hG4bK416855936\r\nFrom: sip:10.100.92.42:42177;tag=1996784611\r\nTo: sip:10.100.92.42:5060\r\nCall-ID: 2059319404"..., len=404, rcv_info=0x4196955c, con=0x41969534) at tcp_read.c:1230 #18 0x0024c484 in tcp_read_req (con=0x41969534, bytes_read=0xbec7f714, read_flags=0xbec7f70c) at tcp_read.c:1431 #19 0x0024fdd8 in handle_io (fm=0x403321ac, events=1, idx=-1) at tcp_read.c:1655 #20 0x0023f814 in io_wait_loop_epoll (h=0x47d90c, t=2, repeat=0) at io_wait.h:1073 #21 0x00250fc8 in tcp_receive_loop (unix_sock=22) at tcp_read.c:1764 #22 0x00234674 in tcp_init_children () at tcp_main.c:4879 #23 0x000c9dec in main_loop () at main.c:1714 #24 0x000d0830 in main (argc=5, argv=0xbec7fd04) at main.c:2581 ```
In process 9375, I found it got stuck in recvfrom () (udp_server.c:446), and the code is:
`len=recvfrom(bind_address->socket, buf, BUF_SIZE, 0, &from->s,&fromlen);`
It seems operate using blocking mode instead of a non-blocking one, why??? Or maybe I miss a packet-check function like select() or poll()??
Thanks in advance. :)
--- Reply to this email directly or view it on GitHub: https://github.com/kamailio/kamailio/issues/510