Hi all,

I have kamailio running in GKE (Google Kubernetes) and recently seeing kamailio crashes frequently and I couldn't find the reason for the crash

kamailio version

version: kamailio 5.4.9 (x86_64/linux) 7d022d
flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLACKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED
ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
id: 7d022d
compiled on 14:26:30 Jul 10 2024 with gcc 8.3.0

kamailio core generation:

Core was generated by `kamailio -e -E -DD -M 128 -m 2048'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  load_route_data_db (rd=0x7ca482a48840) at cr_db.c:416
416 if (rd->carriers[i]->domains[j]->sum_prob == 0.0) {

bt:

(gdb) bt
#0  load_route_data_db (rd=0x7ca482a48840) at cr_db.c:416
#1  0x00007ca474c8cc50 in reload_route_data () at cr_data.c:178
#2  0x00007ca474cabfdc in cr_rpc_reload_routes (rpc=0x7ca4f5a32aa0 <func_param>, c=0x7ca4f5a329c0 <_jsonrpc_ctx_global>) at cr_rpc.c:59
#3  0x00007ca4f5a158a1 in jsonrpc_exec_ex (cmd=0x7ffd774f34b0, rpath=0x0) at jsonrpcs_mod.c:1425
#4  0x00007ca4f5a15ccd in jsonrpc_exec (msg=0x7ca4f641a9f8, cmd=0x7ca4f61bde20 "x\335\033\366\244|", s2=0x0) at jsonrpcs_mod.c:1445
#5  0x00005745eb3fcb19 in do_action (h=0x7ffd774f3dc0, a=0x7ca4f61f1a50, msg=0x7ca4f641a9f8) at core/action.c:1085
#6  0x00005745eb40b35c in run_actions (h=0x7ffd774f3dc0, a=0x7ca4f61f0468, msg=0x7ca4f641a9f8) at core/action.c:1584
#7  0x00007ca474b92f8d in timer_handler (ticks=96802941, tl=0x7ca47b2f7950, data=0x7ca47b2f7868) at timer.c:216
#8  0x00005745eb49e3c4 in slow_timer_main () at core/timer.c:1105
#9  0x00005745eb3a85bd in main_loop () at main.c:1765
#10 0x00005745eb3b34c3 in main (argc=8, argv=0x7ffd774f4688) at main.c:2942


bt full:

(gdb) bt full
#0  load_route_data_db (rd=0x7ca482a48840) at cr_db.c:416
        res = 0x7ca4f636a4a8
        row = 0x7ca4f642a7f8
        i = 0
        j = 110
        ret = 56
        tmp_carrier_data = 0x7ca479ff5068
        query_str = {s = 0x7ca474cdd800 <query> "SELECT DISTINCT domain FROM carrierroute WHERE carrier=8", len = 56}
        tmp_scan_prefix = {s = 0x5745ed889f68 "13235791374", len = 11}
        tmp_rewrite_host = {s = 0x5745ed889f7a "sip.telnyx.com;transport=TLS", len = 28}
        tmp_rewrite_prefix = {s = 0x5745ed889f99 "", len = 0}
        tmp_rewrite_suffix = {s = 0x5745ed889f9a "", len = 0}
        tmp_host_name = {s = 0x40 <error: Cannot access memory at address 0x40>, len = 1965559808}
        tmp_reply_code = {s = 0x7ca474ccf2b0 <__func__.5843> "reload_route_data", len = 1959587872}
        tmp_comment = {s = 0x5745ed889f9b "IMPORTED  - WTJHSX", len = 18}
        p_tmp_comment = 0x7ffd774f30b0
        __func__ = "load_route_data_db"
        n = 0
#1  0x00007ca474c8cc50 in reload_route_data () at cr_data.c:178
        old_data = 0x5745ed850150
        new_data = 0x7ca482a48840
        i = 22341
        __func__ = "reload_route_data"
#2  0x00007ca474cabfdc in cr_rpc_reload_routes (rpc=0x7ca4f5a32aa0 <func_param>, c=0x7ca4f5a329c0 <_jsonrpc_ctx_global>) at cr_rpc.c:59
        __func__ = "cr_rpc_reload_routes"
#3  0x00007ca4f5a158a1 in jsonrpc_exec_ex (cmd=0x7ffd774f34b0, rpath=0x0) at jsonrpcs_mod.c:1425
        rpce = 0x7ca4f6425fb8
        ctx = 0x7ca4f5a329c0 <_jsonrpc_ctx_global>
        ret = -1
        nj = 0x0
        val = {s = 0x5745ed845010 "cr.reload_routes", len = 16}
        scmd = {s = 0x7ca4f61bdd78 "{\"jsonrpc\": \"2.0\", \"method\": \"cr.reload_routes\", \"id\": 2}", len = 57}
        __func__ = "jsonrpc_exec_ex"
#4  0x00007ca4f5a15ccd in jsonrpc_exec (msg=0x7ca4f641a9f8, cmd=0x7ca4f61bde20 "x\335\033\366\244|", s2=0x0) at jsonrpcs_mod.c:1445
        scmd = {s = 0x7ca4f61bdd78 "{\"jsonrpc\": \"2.0\", \"method\": \"cr.reload_routes\", \"id\": 2}", len = 57}
        __func__ = "jsonrpc_exec"
#5  0x00005745eb3fcb19 in do_action (h=0x7ffd774f3dc0, a=0x7ca4f61f1a50, msg=0x7ca4f641a9f8) at core/action.c:1085
        ret = -5
        v = 0
        dst = {send_sock = 0x0, to = {s = {sa_family = 41744, sa_data = "6\366\244|\000\000\035u\271t\244|\000"}, sin = {sin_family = 41744, sin_port = 63030, sin_addr = {s_addr = 31908}, sin_zero = "\035u\271t\244|\000"},
            sin6 = {sin6_family = 41744, sin6_port = 63030, sin6_flowinfo = 31908, sin6_addr = {__in6_u = {__u6_addr8 = "\035u\271t\244|\000\000\005u\271t\244|\000", __u6_addr16 = {29981, 29881, 31908, 0, 29957, 29881,
                    31908, 0}, __u6_addr32 = {1958311197, 31908, 1958311173, 31908}}}, sin6_scope_id = 2001680800}, sas = {ss_family = 41744,
              __ss_padding = "6\366\244|\000\000\035u\271t\244|\000\000\005u\271t\244|\000\000\240\071Ow\375\177\000\000/.:\365\244|\000\000\320:Ow\375\177\000\000\327\337j\353EW\000\000\320\071Ow\375\177\000\000\020@\016\366\244|\000\000\030\245\066\366\244|\000\000\020@\016\366\244|\000\000\020:Ow\375\177\000\000\356u\236\365\244|\000\000\030\245\066\366\244|\000\000\000\000\000\000\000\000\000", __ss_align = 134217728}}, id = 130848392,
          send_flags = {f = 0, blst_imask = 0}, proto = 56 '8', proto_pad0 = 5 '\005', proto_pad1 = 36}
        tmp = 0x7ca4f641a9f8 "\344\032"
        new_uri = 0xed3 <error: Cannot access memory at address 0xed3>
        end = 0x7ca474b9751b "\r\n"
        crt = 0x2 <error: Cannot access memory at address 0x2>
        cmd = 0x7ca4f617b7d8
        len = 0
        user = 15
        uri = {user = {s = 0x9774f3800 <error: Cannot access memory at address 0x9774f3800>, len = 621}, passwd = {s = 0x400000000 <error: Cannot access memory at address 0x400000000>, len = 1721781302}, host = {s = 0x0,
            len = -2147483648}, port = {s = 0x8000000 <error: Cannot access memory at address 0x8000000>, len = -343946861}, params = {s = 0x7ffd774f38e0 "09Ow\375\177", len = -167557169}, sip_params = {
            s = 0x7ca4f53704f0 <dialog_table_name> "l^5\365\244|", len = -164192016}, headers = {s = 0x1700000000 <error: Cannot access memory at address 0x1700000000>, len = 2001681040}, port_no = 14800, proto = 30543,
          type = 32765, flags = (unknown: 4130775280), transport = {s = 0x0, len = 2009201968}, ttl = {s = 0x6774f3a50 <error: Cannot access memory at address 0x6774f3a50>, len = -181058744}, user_param = {
            s = 0x17200000000 <error: Cannot access memory at address 0x17200000000>, len = 2099608304}, maddr = {s = 0x7ffd774f3960 "\035u\271t\244|", len = -344235896}, method = {s = 0x0, len = -2147483648}, lr = {
--Type <RET> for more, q to quit, c to continue without paging--
            s = 0x8000000 <error: Cannot access memory at address 0x8000000>, len = -343946861}, r2 = {s = 0x7ffd774f38d0 "`9Ow\375\177", len = -166931844}, gr = {s = 0x7ca4f53704f0 <dialog_table_name> "l^5\365\244|",
            len = -164192016}, transport_val = {s = 0x7ffd774f3960 "\035u\271t\244|", len = -181636538}, ttl_val = {s = 0x7ffd774f3930 "`9Ow\375\177", len = -167555326}, user_param_val = {s = 0x0, len = 2}, maddr_val = {
            s = 0x7ca4f60c1f77 <db_mysql_submit_query> "UH\211\345AWAVAUATSH\201\354\210", len = -166889584}, method_val = {s = 0x177d6038c8 <error: Cannot access memory at address 0x177d6038c8>, len = 2001681040},
          lr_val = {s = 0xe00000000 <error: Cannot access memory at address 0xe00000000>, len = 1958311183}, r2_val = {s = 0x7ffd774f3960 "\035u\271t\244|", len = -164191464}, gr_val = {
            s = 0x7ffd774f39f0 "\373\377\377\377", len = -344973614}}
        next_hop = {user = {s = 0x8000000 <error: Cannot access memory at address 0x8000000>, len = -343946861}, passwd = {s = 0x7ffd774f3720 "\020\070Ow\375\177", len = -166975523}, host = {
            s = 0x7ffd774f3740 "\020\070Ow\375\177", len = -166887566}, port = {s = 0x7ca4f60431d0 <sql_str> "@@w\355EW", len = -164192016}, params = {s = 0x7ffd774f3d50 "", len = -164192016}, sip_params = {s = 0x0,
            len = -30810208}, headers = {s = 0x0, len = -181051830}, port_no = 8055, proto = 62988, type = 31908, flags = (unknown: 4128077712), transport = {
            s = 0x4774f37d0 <error: Cannot access memory at address 0x4774f37d0>, len = 2001680544}, ttl = {s = 0x669fa379 <error: Cannot access memory at address 0x669fa379>, len = -164192016}, user_param = {
            s = 0x5745ed7743b0 "'sip:+13194676789@us.test.com',NULL,0)", len = 1721781302}, maddr = {s = 0x400000000 <error: Cannot access memory at address 0x400000000>, len = -167513741}, method = {
            s = 0x8000000 <error: Cannot access memory at address 0x8000000>, len = -343946861}, lr = {s = 0x7ffd774f3810 "\340\070Ow\375\177", len = -167567737}, r2 = {
            s = 0x8000000 <error: Cannot access memory at address 0x8000000>, len = -343946861}, gr = {s = 0x7ffd774f3810 "\340\070Ow\375\177", len = -166977673}, transport_val = {s = 0x7ca4f60431d0 <sql_str> "@@w\355EW",
            len = -164192016}, ttl_val = {s = 0x7ffd774f3a90 "\330\267\027\366\244|", len = 23}, user_param_val = {
            s = 0x5745ed77417a "3071,3795,'6e1a9c34-cec5-4ebb-bcd9-3356d80bcf9e','sip:+13194004696@sip.telnyx.com','c5SS3F27B8rDN','sip:+13194676789@us.test.com','f9b9b2c48ef41ef90eb05056b0a794','tls:10.16.0.74:520"..., len = -164192016}, maddr_val = {s = 0x7ca4f60dcf18 "`", len = -31060352}, method_val = {s = 0x0, len = -180627954}, lr_val = {s = 0x7ca4f60c1f77 <db_mysql_submit_query> "UH\211\345AWAVAUATSH\201\354\210",
            len = -166889584}, r2_val = {s = 0x0, len = 0}, gr_val = {s = 0x0, len = 0}}
        u = 0x7ca4f5817bc8
        port = 0
        dst_host = 0x7ca474b9751d
        i = -344941541
        flags = 0
        avp = 0x7ffd774f3c40
        st = {flags = 3984015424, id = 22341, name = {n = -310010607, s = {s = 0x5745ed859d11 "", len = 2001679968}, re = 0x5745ed859d11}, avp = 0x66a04c36}
        sct = 0x60f60e6120
        sjt = 0x5f60e6a50
        rve = 0x8000000
        mct = 0x0
        rv = 0xffffffff
        rv1 = 0x5745eb7fc993
        c1 = {cache_type = 10, val_type = RV_NONE, c = {avp_val = {n = 0, s = {s = 0x0, len = 0}, re = 0x0}, pval = {rs = {s = 0x0, len = 0}, ri = 2001679912, flags = 32765}},
          i2s = "`6Ow\375\177\000\000\031\241\a\366\244|\000\000\250\003\000\000\000"}
        s = {s = 0xffffffff <error: Cannot access memory at address 0xffffffff>, len = -164179120}
        srevp = {0x3, 0x64870454268b4200}
        evp = {data = 0x0, obuf = {s = 0x0, len = 0}, rcv = 0x0, dst = 0x0, req = 0x0, rpl = 0x0, rplcode = 0, mode = 0}
        mod_f_params = {{type = NOSUBTYPE, u = {number = 0, string = 0x0, str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}, {type = NOSUBTYPE, u = {number = 0, string = 0x0, str = {s = 0x0, len = 0},
              data = 0x0, attr = 0x0, select = 0x0}}, {type = NOSUBTYPE, u = {number = 0, string = 0x0, str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}, {type = NOSUBTYPE, u = {number = 0, string = 0x0,
              str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}, {type = NOSUBTYPE, u = {number = 0, string = 0x0, str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}, {type = NOSUBTYPE, u = {
              number = 0, string = 0x0, str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}, {type = NOSUBTYPE, u = {number = 0, string = 0x0, str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}, {
            type = NOSUBTYPE, u = {number = 0, string = 0x0, str = {s = 0x0, len = 0}, data = 0x0, attr = 0x0, select = 0x0}}}
        __func__ = "do_action"
#6  0x00005745eb40b35c in run_actions (h=0x7ffd774f3dc0, a=0x7ca4f61f0468, msg=0x7ca4f641a9f8) at core/action.c:1584
        t = 0x7ca4f61f1a50
        ret = 1
        tvb = {tv_sec = 0, tv_usec = 0}
        tve = {tv_sec = 0, tv_usec = 0}
        tz = {tz_minuteswest = 60, tz_dsttime = 0}
        tdiff = 4131498432
        __func__ = "run_actions"
#7  0x00007ca474b92f8d in timer_handler (ticks=96802941, tl=0x7ca47b2f7950, data=0x7ca47b2f7868) at timer.c:216
        msg = 0x7ca4f641a9f8
        a = 0x7ca47b2f7868
        ra_ctx = {rec_lev = 1, run_flags = 0, last_retcode = 1, jmp_env = {{__jmpbuf = {95957815091336, 8333654393230933038, 0, 2147483648, 134217728, 95957815380371, 8333654393012829230, 2510245087605373998},
              __mask_was_saved = 0, __saved_mask = {__val = {137047930071236, 0, 25985340079663656, 140726605135616, 137045787154760, 70721158768, 137045787160648, 140726605135472, 137045776334895, 1095350878208,
                  137045782276504, 140735195070144, 140726605135504, 137045776144708, 0, 140726605135552}}}}}
        keng = 0x0
        evname = {s = 0x7ca474b9715a "timer", len = 5}
        __func__ = "timer_handler"
--Type <RET> for more, q to quit, c to continue without paging--
#8  0x00005745eb49e3c4 in slow_timer_main () at core/timer.c:1105
        n = 12
        ret = 4294967295
        tl = 0x7ca47b2f7950
        i = 1010
        __func__ = "slow_timer_main"
#9  0x00005745eb3a85bd in main_loop () at main.c:1765
        i = 16
        pid = 0
        si = 0x0
        si_desc = "udp receiver child=15 sock=10.16.0.74:5101 (35.202.133.184:5101)\000AOw\375\177\000\000d\366\023\376\244|\000\000 \000\000\000\060\000\000\000@AOw\375\177\000\000\200@Ow\375\177\000\000\000B\213&T\004\207d\223\311\177\353EW\000\000\240\274\065\366\244|\000"
        nrprocs = 16
        woneinit = 1
        __func__ = "main_loop"
#10 0x00005745eb3b34c3 in main (argc=8, argv=0x7ffd774f4688) at main.c:2942
        cfg_stream = 0x5745ed74d2a0
        c = -1
        r = 0
        tmp = 0x7ffd774f5d9f ""
        tmp_len = 0
        port = 0
        proto = 0
        ahost = 0x0
        aport = 0
        options = 0x5745eb7b8d78 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
        ret = -1
        seed = 1555962608
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 2
        n_lst = 0x7ffd77580248
        p = 0x7ca4fe189995 <handle_intel+197> "H\205\300uf\213t$\004M\215\216\244"
        st = {st_dev = 0, st_ino = 0, st_nlink = 0, st_mode = 0, st_uid = 0, st_gid = 0, __pad0 = 0, st_rdev = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0}, st_mtim = {tv_sec = 0,
            tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, __glibc_reserved = {0, 0, 0}}
        tbuf = "\b\000\000\000\000\000\000\000\210FOw\375\177\000\000\320FOw\375\177\000\000\330+,\376\244|\000\000\000\000\000\000\000\000\000\000\020\vH\376\244|\000\000\001", '\000' <repeats 19 times>, " \000\000\000(&G\376\244|\000\000\320EOw\375\177\000\000\312tH\376\244|\000\000\000\000\000\000\000\000\000\000@e*\376\244|\000\000\320\263+\376\244|\000\000\364EOw\375\177\000\000\006", '\000' <repeats 15 times>, "/", '\000' <repeats 39 times>, "\200\037\000\000\377\377", '\000' <repeats 186 times>...
        option_index = 0
        long_options = {{name = 0x5745eb7baf46 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x5745eb7b6524 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x5745eb7baf4b "alias", has_arg = 1, flag = 0x0,
            val = 1024}, {name = 0x5745eb7baf51 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x5745eb7baf57 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x5745eb7baf60 "substdefs", has_arg = 1,
            flag = 0x0, val = 1027}, {name = 0x5745eb7baf6a "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x5745eb7baf74 "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x5745eb7baf7f "modparam",
            has_arg = 1, flag = 0x0, val = 1030}, {name = 0x5745eb7baf88 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {name = 0x5745eb7baf93 "debug", has_arg = 1, flag = 0x0, val = 1032}, {
            name = 0x5745eb7baf99 "atexit", has_arg = 1, flag = 0x0, val = 1034}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}}
        __func__ = "main"


(gdb) list
411 }
412 } while(RES_ROW_N(res) > 0);
413
414 for (i = 0; i < rd->carrier_num; ++i) {
415 for (j = 0; j < rd->carriers[i]->domain_num; ++j) {
416 if (rd->carriers[i]->domains[j]->sum_prob == 0.0) {
417 LM_ERR("All routes with carrier id %d (%.*s) and domain id %d (%.*s) have probability 0.\n",
418 rd->carriers[i]->id, rd->carriers[i]->name->len, rd->carriers[i]->name->s,
419 rd->carriers[i]->domains[j]->id, rd->carriers[i]->domains[j]->name->len, rd->carriers[i]->domains[j]->name->s);
420 }


Please help to understand the reason for the crash and how to fix the problem.

Thank you in advance.

Regards

Maharaja Azhagiah