Description

This is happened during load test of kamailio 5.7.

Troubleshooting

Reproduction

This cannot be reproduced at will.
I have 4 load tests environments running UDP and TLS load tests.
The environments run the same tests. One of the the kamailio instances crashed after 20 days.

Debugging Data

I got two core files:


[root@lab002109-flip-server ~]$ ls -l /core*
-rw------- 1 root root  671449088 Dec 16 20:58 /core.1623673
-rw------- 1 root root 1052164096 Dec 16 20:57 /core.1623683


[root@lab002109-flip-server ~]$ gdb /usr/local/src/git/kamailio-master/src/kamailio /core.1623683
GNU gdb (Debian 10.1-1.7) 10.1.90.20210103-git
Copyright (C) 2021 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
    <http://www.gnu.org/software/gdb/documentation/>.

For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from /usr/local/src/git/kamailio-master/src/kamailio...

warning: Can't open file /dev/zero (deleted) during file-backed mapping note processing
[New LWP 1623683]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `/usr/local/src/git/kamailio-master/src/kamailio -m 991 -f /usr/local/etc/kamail'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:934
934	../sysdeps/x86_64/multiarch/memcmp-sse4.S: No such file or directory.
(gdb) bt full
#0  __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:934
No locals.
#1  0x00007ff7a8331071 in __tzstring_len (s=0x561ac1e60143 "b\004k,\037wnG\036\377\342;\245\240", len=14) at tzset.c:93
        p = 0x14480bd1886e4261 <error: Cannot access memory at address 0x14480bd1886e4261>
        t = 0x561ac1e64180
        u = 0x561ac1e64180
        new = <optimized out>
#2  0x00007ff7a8331765 in __tzstring (s=<optimized out>) at tzset.c:121
No locals.
#3  0x00007ff7a83330b7 in __tzfile_compute (timer=timer@entry=1671191315, use_localtime=use_localtime@entry=1, leap_correct=leap_correct@entry=0x7ffcc0288070, leap_hit=leap_hit@entry=0x7ffcc028806c, 
    tp=tp@entry=0x7ffcc02880d0) at tzfile.c:599
        info = <optimized out>
        i = 0
        __PRETTY_FUNCTION__ = "__tzfile_compute"
#4  0x00007ff7a8331c99 in __tz_convert (timer=1671191315, use_localtime=use_localtime@entry=1, tp=tp@entry=0x7ffcc02880d0) at tzset.c:580
        leap_correction = 5
        leap_extra_secs = 32764
#5  0x00007ff7a8330030 in __localtime_r (t=t@entry=0x7ffcc02880b8, tp=tp@entry=0x7ffcc02880d0) at localtime.c:30
No locals.
#6  0x00007ff7a836e17f in __vsyslog_internal (pri=-1071087408, fmt=0x561ac0da95f0 "%s: %.*s%s%s%serror reading: %s (%d) ([%s]:%u ->", ap=0x7ffcc02881b0, mode_flags=0) at ../misc/syslog.c:209
        now_tm = {tm_sec = 1671191315, tm_min = 0, tm_hour = 193484271, tm_mday = 0, tm_mon = 0, tm_year = 0, tm_wday = -1487758381, tm_yday = 32759, tm_isdst = 0, tm_gmtoff = -3618958850589909760, 
          tm_zone = 0x7ffcc0288130 "p\201(\300\374\177"}
        now = 1671191315
        fd = <optimized out>
        f = 0x561ac1e6b380
        buf = 0x0
        bufsize = 0
        msgoff = <optimized out>
        saved_errno = 104
        failbuf = "\260%\rj\367\177\000\000o\033@\250\367\177\000\000\220\202(\300\374\177\000\000\037\301*\250", <incomplete sequence \367>
        clarg = {buf = <optimized out>, oldaction = <optimized out>}
#7  0x00007ff7a836e6d6 in __syslog (pri=<optimized out>, fmt=<optimized out>) at ../misc/syslog.c:117
        ap = {{gp_offset = 16, fp_offset = 48, overflow_arg_area = 0x7ffcc0288290, reg_save_area = 0x7ffcc02881d0}}
#8  0x0000561ac0bc5681 in tcp_read_data (fd=10, c=0x7ff769ce5b80, 
    buf=0x7ffcc02984b0 "\027\003\003\002\\ީ\241O\275\332\373\350\362y\323E\347 %Ϊ\265Z\245\375rp\207\250\r\243\234\241E\342?\251\274$\363\256f\363\374\326\335[\275/%\330GO\au\025F\217\202\352J\026H\f \205\322\020C-<,\315A'S\316\350x\207Ƣ\001\062\360]\317\340鈪\326Kz\376\224\224\202K\005\221\064\370\f\340fC\242m\351\064\271\253A\242\033\223n\302R\231\370n\312\027\241J\361(\037\260@\273\273+2jO\253\063\301\362\347\374\361\311A\206\004\372\025\203\063\021\027\rGXϕ\225\236\216*V\252؉\216\245\025f\226\205\346H\276#\314<2m\b\033I\350\067\270A'\255\022\242\371\206\361h>]"..., b_size=16383, 
    flags=0x7ffcc02a8910) at core/tcp_read.c:269
        __llevel = -1
        bytes_read = -1
        __func__ = "tcp_read_data"
#9  0x00007ff7a63bf382 in tls_h_read_f (c=0x7ff769ce5b80, flags=0x7ffcc02a8910) at tls_server.c:1112
        r = 0x7ff769ce5ca8
        bytes_free = 16383
        bytes_read = 609
        read_size = 16383
        ssl_error = 2
        ssl_read = 0
        ssl = 0x0
        rd_buf = "\027\003\003\002\\ީ\241O\275\332\373\350\362y\323E\347 %Ϊ\265Z\245\375rp\207\250\r\243\234\241E\342?\251\274$\363\256f\363\374\326\335[\275/%\330GO\au\025F\217\202\352J\026H\f \205\322\020C-<,\315A'S\316\350x\207Ƣ\001\062\360]\317\340鈪\326Kz\376\224\224\202K\005\221\064\370\f\340fC\242m\351\064\271\253A\242\033\223n\302R\231\370n\312\027\241J\361(\037\260@\273\273+2jO\253\063\301\362\347\374\361\311A\206\004\372\025\203\063\021\027\rGXϕ\225\236\216*V\252؉\216\245\025f\226\205\346H\276#\314<2m\b\033I\350\067\270A'\255\022\242\371\206\361h>]\331|\316{\032;r"...
        wr_buf = "\027\003\003\000\372\371\326k\301\036\231\226\065\265\324y\030\242M@\366\311\066\v\f\351e\231\360\037\250~SQ~^\242\253\314tebbޤh\b\331\364f\272\373\f.\252\375qq@\000M\374\353\352\326\064dL\243!\213\373\261\n\340\271p\377\221\206e\ḇ\327\324ӟ]aH\240\364\256\036\327\333\003z\373\204\265f\350ٿ\220\273\202/\023\006\245d%\366_\020M\300(\304CX\355\241\177\277\210.\331BGN\207@\360\020v\36--Type <RET> for more, q to quit, c to continue without paging--
6m쳮\200\342J\250]\364\205\321\302ZC\310f\366\314\365\063\063\344?R\213\366\253\342]o\247\206{\225Լ\355\305\361\031\357\233?\006d\232Ni\001\301n\t\023?\243\351\231=\210\345\023.i\225\241d"...
        rd = {
          buf = 0x7ffcc02984b0 "\027\003\003\002\\ީ\241O\275\332\373\350\362y\323E\347 %Ϊ\265Z\245\375rp\207\250\r\243\234\241E\342?\251\274$\363\256f\363\374\326\335[\275/%\330GO\au\025F\217\202\352J\026H\f \205\322\020C-<,\315A'S\316\350x\207Ƣ\001\062\360]\317\340鈪\326Kz\376\224\224\202K\005\221\064\370\f\340fC\242m\351\064\271\253A\242\033\223n\302R\231\370n\312\027\241J\361(\037\260@\273\273+2jO\253\063\301\362\347\374\361\311A\206\004\372\025\203\063\021\027\rGXϕ\225\236\216*V\252؉\216\245\025f\226\205\346H\276#\314<2m\b\033I\350\067\270A'\255\022\242\371\206\361h>]"..., pos = 0, 
          used = 0, size = 65536}
        wr = {buf = 0x7ffcc02884b0 "\027\003\003", pos = 0, used = 0, size = 65536}
        tls_c = 0x7ff769dcb410
        enc_rd_buf = 0x0
        n = -1
        flush_flags = 1
        err_src = 0x7ff7a63dc9a0 "TLS read:"
        ip_buf = '\000' <repeats 63 times>
        x = 8208
        tls_dbg = -1471984768
        __func__ = "tls_h_read_f"
#10 0x0000561ac0bc6f86 in tcp_read_headers (c=0x7ff769ce5b80, read_flags=0x7ffcc02a8910) at core/tcp_read.c:441
        bytes = 587
        remaining = 0
        p = 0x7ff769ce6173 "eM/11UlxzYsYOYZZ\",response=\"64173fa507e640f18f83898839e540e0\",algorithm=MD5\r\nExpires: 3600\r\nAllow: INVITE,CANCEL,ACK,BYE,NOTIFY,REFER,OPTIONS,INFO,MESSAGE\r\nUser-Agent: Acrobits SIPIS\r\nContent-Length: "...
        r = 0x7ff769ce5ca8
        mc = 32759
        body_len = 27086
        mfline = 0xd00000010 <error: Cannot access memory at address 0xd00000010>
        mtransid = {s = 0x7ff769ce5b80 "F", len = 1775131544}
        __func__ = "tcp_read_headers"
#11 0x0000561ac0bcf97f in tcp_read_req (con=0x7ff769ce5b80, bytes_read=0x7ffcc02a8914, read_flags=0x7ffcc02a8910) at core/tcp_read.c:1469
        bytes = -1
        total_bytes = 0
        resp = 1
        size = 249108103168
        req = 0x7ff769ce5ca8
        dst = {send_sock = 0x7ff769ce5b80, to = {s = {sa_family = 0, sa_data = "\000\000\002\000\000\000\001\000*\300:\000\000"}, sin = {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 2}, 
              sin_zero = "\001\000*\300:\000\000"}, sin6 = {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 2, sin6_addr = {__in6_u = {
                  __u6_addr8 = "\001\000*\300:\000\000\000\004\000\352\300\032V\000", __u6_addr16 = {1, 49194, 58, 0, 4, 49386, 22042, 0}, __u6_addr32 = {3223977985, 58, 3236560900, 22042}}}, 
              sin6_scope_id = 0}, sas = {ss_family = 0, 
              __ss_padding = "\000\000\002\000\000\000\001\000*\300:\000\000\000\004\000\352\300\032V\000\000\000\000\000\000\000\000\000\000\030]\316i\367\177\000\000@\210*\300\332\020$_\020\000\000\000\020\000\000\000\024\000\000\000\000\000\000\000\001\000\000\000\001\000\000\000\n\000\000\000\032V\000\000\000\000\000\000\000\000\000\000\b\211*\300\374\177\000\000\b", '\000' <repeats 22 times>, 
              __ss_align = 140723532498832}}, id = 1, send_flags = {f = 0, blst_imask = 0}, proto = 112 'p', proto_pad0 = -121 '\207', proto_pad1 = -16342}
        c = 0 '\000'
        ret = 0
        __func__ = "tcp_read_req"
#12 0x0000561ac0bd501c in handle_io (fm=0x7ff7a7d839d8, events=1, idx=-1) at core/tcp_read.c:1780
        ret = 8
        n = 8
        read_flags = RD_CONN_FORCE_EOF
        con = 0x7ff769ce5b80
        s = 10
        resp = 94672905982120
        t = 0
        ee = 0x0
        __func__ = "handle_io"
#13 0x0000561ac0bc1738 in io_wait_loop_epoll (h=0x561ac0eab2a0 <io_w>, t=2, repeat=0) at core/io_wait.h:1070
--Type <RET> for more, q to quit, c to continue without paging--
        n = 1
        r = 0
        fm = 0x7ff7a7d839d8
        revents = 1
        __func__ = "io_wait_loop_epoll"
#14 0x0000561ac0bd7fc5 in tcp_receive_loop (unix_sock=58) at core/tcp_read.c:1976
        __func__ = "tcp_receive_loop"
#15 0x0000561ac0bb5b2a in tcp_init_children (woneinit=0x7ffcc02a8cbc) at core/tcp_main.c:5237
        r = 5
        i = 11
        reader_fd_1 = 58
        pid = 0
        si_desc = "tcp receiver (generic)\000\000Z\306\030\000\000\000\000\000\060\354\216\300\032V", '\000' <repeats 26 times>, "P\214*\300\374\177\000\000\rw\264\300\032V\000\000P\214*\300\374\177\000\000Y(\276\300\000\000\000\000\000\214*\300\374\177\000\000{\371\317\300\032V\000\000\060\354\216\300\032V\000\000\210IIh(\000\000"
        si = 0x0
        __func__ = "tcp_init_children"
#16 0x0000561ac0902189 in main_loop () at main.c:1851
        i = 12
        pid = 1623670
        si = 0x0
        si_desc = "udp receiver child=11 sock=192.168.2.109:9060\000\000\000 \215*\300\374\177\000\000\221\061=\246\000\000\270\000\200\215*\300\374\177\000\000\000\000\000\000\000\000\000\000\220\215*\300\374\177\000\000\260\a\257\247\367\177\000\000 \216*\300\374\177\000\000e\"9\246\367\177", '\000' <repeats 14 times>, "\001\000\000"
        nrprocs = 12
        woneinit = 1
        __func__ = "main_loop"
#17 0x0000561ac090cb77 in main (argc=8, argv=0x7ffcc02a9378) at main.c:3085
        cfg_stream = 0x561ac1d9f2a0
        c = -1
        r = 0
        tmp = 0x7ffcc02aad03 ""
        tmp_len = 0
        port = 0
        proto = 0
        ahost = 0x0
        aport = 0
        options = 0x561ac0d4a278 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
        ret = -1
        seed = 786272114
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 0
        n_lst = 0x0
        p = 0xc2 <error: Cannot access memory at address 0xc2>
        st = {st_dev = 22, st_ino = 1190, st_nlink = 2, st_mode = 16832, st_uid = 0, st_gid = 0, __pad0 = 0, st_rdev = 0, st_size = 60, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1653977759, 
            tv_nsec = 938156449}, st_mtim = {tv_sec = 1669682481, tv_nsec = 196757736}, st_ctim = {tv_sec = 1669682485, tv_nsec = 464818434}, __glibc_reserved = {0, 0, 0}}
        tbuf = "\020\037\\\250\367\177\000\000\300*)\250\001\000\000\000\377\377\377\377", '\000' <repeats 12 times>, "(\006]\250\367\177\000\000\350\271_\250\367\177\000\000\377\377\377\377", '\000' <repeats 12 times>, "@\345'\250\367\177\000\000\020$\\\250\367\177\000\000\350\304_\250\367\177\000\000\204\t]\250\367\177\000\000\060\004]\250\367\177\000\000X\202E\250\367\177\000\000h\271_\250\367\177\000\000`\260_\250\367\177\000\000\300\223*\300\374\177\000\000\200\301_\250\367\177\000\000\000\000\000\000\000\000\000\000#\366]\250\367\177\000\000\001", '\000' <repeats 23 times>, "(\006]\250\367\177\000\000Б*\300\374\177\000\000\003"...
        option_index = 12
        long_options = {{name = 0x561ac0d4c706 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x561ac0d47521 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x561ac0d4c70b "alias", 
            has_arg = 1, flag = 0x0, val = 1024}, {name = 0x561ac0d4c711 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x561ac0d4c717 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {
            name = 0x561ac0d4c720 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x561ac0d4c72a "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x561ac0d4c734 "loadmodule", 
--Type <RET> for more, q to quit, c to continue without paging--
            has_arg = 1, flag = 0x0, val = 1029}, {name = 0x561ac0d4c73f "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x561ac0d4c748 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {
            name = 0x561ac0d4c753 "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x561ac0d4c759 "cfg-print", has_arg = 0, flag = 0x0, val = 1033}, {name = 0x561ac0d4c763 "atexit", has_arg = 1, 
            flag = 0x0, val = 1034}, {name = 0x561ac0d4c76a "all-errors", has_arg = 0, flag = 0x0, val = 1035}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}}
        __func__ = "main"
(gdb) info locals
No locals.
(gdb) list
929	in ../sysdeps/x86_64/multiarch/memcmp-sse4.S
(gdb) 

I understand the next segfault happened as a consequence of the first one:

[root@lab002109-flip-server ~]$ gdb /usr/local/src/git/kamailio-master/src/kamailio /core.1623673 
GNU gdb (Debian 10.1-1.7) 10.1.90.20210103-git
Copyright (C) 2021 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
    <http://www.gnu.org/software/gdb/documentation/>.

For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from /usr/local/src/git/kamailio-master/src/kamailio...
BFD: warning: /core.1623673 is truncated: expected core file size >= 1052454912, found: 671449088

warning: Can't open file /dev/zero (deleted) during file-backed mapping note processing
[New LWP 1623673]
Cannot access memory at address 0x7ff7a85fc088
Cannot access memory at address 0x7ff7a85fc080
Failed to read a valid object file image from memory.
Core was generated by `/usr/local/src/git/kamailio-master/src/kamailio -m 991 -f /usr/local/etc/kamail'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  qm_insert_free (qm=<error reading variable: Cannot access memory at address 0x7ffcc02a6df8>, frag=<error reading variable: Cannot access memory at address 0x7ffcc02a6df0>) at core/mem/q_malloc.c:177
177		prev=FRAG_END(f)->prev_free;
(gdb) bt full
#0  qm_insert_free (qm=<error reading variable: Cannot access memory at address 0x7ffcc02a6df8>, frag=<error reading variable: Cannot access memory at address 0x7ffcc02a6df0>) at core/mem/q_malloc.c:177
        f = <error reading variable f (Cannot access memory at address 0x7ffcc02a6e18)>
        prev = <error reading variable prev (Cannot access memory at address 0x7ffcc02a6e08)>
        hash = <error reading variable hash (Cannot access memory at address 0x7ffcc02a6e14)>
Backtrace stopped: Cannot access memory at address 0x7ffcc02a6e28
(gdb) info locals
f = <error reading variable f (Cannot access memory at address 0x7ffcc02a6e18)>
prev = <error reading variable prev (Cannot access memory at address 0x7ffcc02a6e08)>
hash = <error reading variable hash (Cannot access memory at address 0x7ffcc02a6e14)>
(gdb) list
172		for(f=qm->free_hash[hash].head.u.nxt_free; f!=&(qm->free_hash[hash].head);
173				f=f->u.nxt_free){
174			if (frag->size <= f->size) break;
175		}
176		/*insert it here*/
177		prev=FRAG_END(f)->prev_free;
178		prev->u.nxt_free=frag;
179		FRAG_END(frag)->prev_free=prev;
180		frag->u.nxt_free=f;
181		FRAG_END(f)->prev_free=frag;
(gdb) 


Additional Information

version: kamailio 5.7.0-dev2 (x86_64/linux) 8531e4
flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLOCKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED
ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
id: 8531e4 
compiled on 09:03:45 Nov 29 2022 with gcc 10.2.1

[root@lab002109-flip-server ~]$ lsb_release -a
No LSB modules are available.
Distributor ID:	Debian
Description:	Debian GNU/Linux 11 (bullseye)
Release:	11
Codename:	bullseye

[root@lab002109-flip-server ~]$ uname -a
Linux lab002109-flip-server 5.10.0-14-amd64 #1 SMP Debian 5.10.113-1 (2022-04-29) x86_64 GNU/Linux
[root@lab002109-flip-server ~]$ 


Reply to this email directly, view it on GitHub, or unsubscribe.
You are receiving this because you are subscribed to this thread.Message ID: <kamailio/kamailio/issues/3316@github.com>