Update after I got access to a system exposing the issue.
The timer process of kamailio was using 100% CPU, being blocked, and by that not releasing/removing active/terminated transactions, leading to filling the memory - top output:
``` PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 13582 root 20 0 2274068 18012 12856 R 99.7 0.4 6179:53 kamailio ```
The kamclt ps:
``` { IDX: 14 PID: 13582 DSC: timer } ```
Attaching with gdb revealed the following backtrace:
``` #0 0x00007fec0b6ea7d8 in yaSSL::States::What (this=this@entry=0x55b99e2fbf70) at ./extra/yassl/src/yassl_int.cpp:208 #1 0x00007fec0b6eab31 in yaSSL::SSL::GetError (this=this@entry=0x55b99e2fb600) at ./extra/yassl/src/yassl_int.cpp:1520 #2 0x00007fec0b6f89fb in yaSSL::receiveData (ssl=..., data=..., peek=peek@entry=false) at ./extra/yassl/src/handshake.cpp:1047 #3 0x00007fec0b6de65d in yaSSL::yaSSL_read (ssl=ssl@entry=0x55b99e2fb600, buffer=buffer@entry=0x55b99e2f91a0, sz=sz@entry=4) at ./extra/yassl/src/ssl.cpp:363 #4 0x00007fec0b6c6797 in vio_ssl_read (vio=0x55b99e2d83b0, buf=0x55b99e2f91a0 "@", size=<optimized out>) at ./vio/viossl.c:186 #5 0x00007fec0b69fbae in net_read_raw_loop (count=4, net=0x7fec0c0378f0) at ./sql/net_serv.cc:672 #6 net_read_packet_header (net=0x7fec0c0378f0) at ./sql/net_serv.cc:762 #7 net_read_packet (net=0x7fec0c0378f0, complen=0x7ffe188c6ff0) at ./sql/net_serv.cc:822 #8 0x00007fec0b6a0c2c in my_net_read (net=net@entry=0x7fec0c0378f0) at ./sql/net_serv.cc:899 #9 0x00007fec0b694edc in cli_safe_read_with_ok (mysql=mysql@entry=0x7fec0c0378f0, parse_ok=parse_ok@entry=0 '\000', is_data_packet=is_data_packet@entry=0x0) at ./sql-common/client.c:1061 #10 0x00007fec0b69518f in cli_safe_read (mysql=mysql@entry=0x7fec0c0378f0, is_data_packet=is_data_packet@entry=0x0) at ./sql-common/client.c:1194 #11 0x00007fec0b695fa8 in cli_read_query_result (mysql=0x7fec0c0378f0) at ./sql-common/client.c:5245 #12 0x00007fec0b697d61 in mysql_real_query (mysql=0x7fec0c0378f0, query=<optimized out>, length=<optimized out>) at ./sql-common/client.c:5341 #13 0x00007fec0bc9a2ae in db_mysql_submit_query (_h=0x7fec0c1a4808, _s=0x7fec0b45a2d0 <sql_str>) at km_dbase.c:116 #14 0x00007fec0b249f6d in db_do_submit_query (_h=0x7fec0c1a4808, _query=0x7fec0b45a2d0 <sql_str>, submit_query=0x7fec0bc99b36 <db_mysql_submit_query>) at db_query.c:53 #15 0x00007fec0b24d10d in db_do_delete (_h=0x7fec0c1a4808, _k=0x7ffe188c7310, _o=0x0, _v=0x7ffe188c7330, _n=2, val2str=0x7fec0bc98136 <db_mysql_val2str>, submit_query=0x7fec0bc99b36 <db_mysql_submit_query>) at db_query.c:291 #16 0x00007fec0bc9ed64 in db_mysql_delete (_h=0x7fec0c1a4808, _k=0x7ffe188c7310, _o=0x0, _v=0x7ffe188c7330, _n=2) at km_dbase.c:515 #17 0x00007fec09669714 in remove_dialog_from_db (cell=0x7feb84c98a98) at dlg_db_handler.c:714 #18 0x00007fec09645375 in destroy_dlg (dlg=0x7feb84c98a98) at dlg_hash.c:371 #19 0x00007fec0964cca0 in dlg_unref_helper (dlg=0x7feb84c98a98, cnt=2, fname=0x7fec096dd53f "dlg_handlers.c", fline=410) at dlg_hash.c:961 #20 0x00007fec09671fb5 in dlg_ontdestroy (t=0x7feb84dbf640, type=131072, param=0x7ffe188c7620) at dlg_handlers.c:410 #21 0x00007fec099dfb05 in run_trans_callbacks_internal (cb_lst=0x7feb84dbf6b8, type=131072, trans=0x7feb84dbf640, params=0x7ffe188c7620) at t_hooks.c:260 #22 0x00007fec099dfc2c in run_trans_callbacks (type=131072, trans=0x7feb84dbf640, req=0x0, rpl=0x0, code=0) at t_hooks.c:287 ---Type <return> to continue, or q <return> to quit--- #23 0x00007fec09915f45 in free_cell_helper (dead_cell=0x7feb84dbf640, silent=0, fname=0x7fec09a28882 "timer.c", fline=689) at h_table.c:166 #24 0x00007fec099cdbf6 in wait_handler (ti=1667374146, wait_tl=0x7feb84dbf6c8, data=0x7feb84dbf640) at timer.c:689 #25 0x000055b99c9f02c5 in timer_list_expire (t=1667374146, h=0x7feb8461a050, slow_l=0x7feb8461d988, slow_mark=1904) at core/timer.c:874 #26 0x000055b99c9f076d in timer_handler () at core/timer.c:939 #27 0x000055b99c9f0c36 in timer_main () at core/timer.c:978 #28 0x000055b99c942322 in main_loop () at main.c:1727 #29 0x000055b99c949a1b in main (argc=7, argv=0x7ffe188c7ca8) at main.c:2696 ```
Showing that the processes executing code from inside libmysqlclient, related to tcp/tls connection. I noticed in config that tls module is used and the libssl version is 1.1, so this can be a side effect of the issue related to the new locking API in libssl/libcrypto.
The solutions to try would be either to compile against libssl 1.0.x or use the pre-loaded library workaround:
* https://github.com/kamailio/kamailio/tree/master/src/modules/tls/utils/opens...
Run it with one of these two and let's see if it works fine or the issue appears again.