Hi,
you gave me an idea where to find the error.
We modified the ul_publish.c source in order to achieve the following results:
- when client sends a REGISTER, an implicit PUBLISH is called so in each
registration, publication should also happen
- when tcp socket is lost, we want that the contacts of the lost user get a
presence NOTIFY with open=0 state to show the unregistered (lost) state
Here is the ul_publish.c source, maybe the bug is in it. But in our dev system, it works
like a charm:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/parser.h>
#include <time.h>
#include "../../parser/parse_expires.h"
#include "../../parser/msg_parser.h"
#include "../../str.h"
#include "../../dset.h"
#include "../usrloc/usrloc.h"
#include "../usrloc/ul_callback.h"
#include "../../modules/tm/tm_load.h"
#include "../pua/pua.h"
#include "pua_usrloc.h"
#define BUF_LEN 256
#define REG_BY_PUBLISH
#ifdef REG_BY_PUBLISH
static char* device_buffer = NULL;
static char* state_buffer = NULL;
static char* content_buffer = NULL;
static void replace_string(char** dst, const char* source, int len)
{
if (*dst)
{
free(*dst);
*dst = NULL;
}
if (source)
{
*dst = malloc(len+1);
strncpy(*dst, source, len);
(*dst)[len] = 0;
}
}
#endif
int pua_set_publish(struct sip_msg* msg , char* s1, char* s2)
{
#ifdef REG_BY_PUBLISH
const char* first_delimiter = NULL;
const char* second_delimiter = NULL;
// Prepare place to store data
replace_string(&device_buffer, NULL, 0);
replace_string(&state_buffer, NULL, 0);
replace_string(&content_buffer, NULL, 0);
if (s1)
{
first_delimiter = strchr(s1, ';');
if (first_delimiter)
second_delimiter =
strchr(first_delimiter+1, ';');
if (first_delimiter)
{
replace_string(&device_buffer, s1,
first_delimiter - s1);
if (second_delimiter)
{
replace_string(&state_buffer, first_delimiter+1, second_delimiter - first_delimiter -
1);
replace_string(&content_buffer, second_delimiter+1, strlen(s1) - (second_delimiter -
s1 + 1));
}
}
}
if (strlen(s1) && (!device_buffer || !state_buffer ||
!content_buffer))
{
LM_WARN("Failed to parse PresenceInReg
header\n");
return 1;
}
if (device_buffer)
LM_DBG("Device: %s\n", device_buffer);
if (state_buffer)
LM_DBG("State: %s\n", state_buffer);
if (content_buffer)
LM_DBG("Content: %s\n", content_buffer);
#endif
pua_ul_publish= 1;
if(pua_ul_bmask!=0)
setbflag(0, pua_ul_bflag);
return 1;
}
int pua_unset_publish(struct sip_msg* msg, unsigned int flags, void* param)
{
// LM_ERR("unset send publish\n");
pua_ul_publish= 0;
if(pua_ul_bmask!=0)
resetbflag(0, pua_ul_bflag);
return 1;
}
/* for debug purpose only */
void print_publ(publ_info_t* p)
{
LM_DBG("publ:\n");
LM_DBG("uri= %.*s\n", p->pres_uri->len,
p->pres_uri->s);
LM_DBG("id= %.*s\n", p->id.len, p->id.s);
LM_DBG("expires= %d\n", p->expires);
}
str* build_pidf(ucontact_t* c
#ifdef REG_BY_PUBLISH
, int open, const char* tuple_id, const char* content
#endif
)
{
xmlDocPtr doc = NULL;
xmlNodePtr root_node = NULL;
xmlNodePtr tuple_node = NULL;
xmlNodePtr status_node = NULL;
xmlNodePtr basic_node = NULL;
#ifdef REG_BY_PUBLISH
xmlNodePtr note_node = NULL;
#endif
str *body= NULL;
str pres_uri= {NULL, 0};
char buf[BUF_LEN];
char* at= NULL;
if(c->expires< (int)time(NULL))
{
LM_ERR("PUBLISH: found expired \n\n");
#ifdef REG_BY_PUBLISH
open = 0;
#else
return NULL;
#endif
}
pres_uri.s = buf;
if(pres_prefix.s)
{
memcpy(pres_uri.s, pres_prefix.s, pres_prefix.len);
pres_uri.len+= pres_prefix.len;
memcpy(pres_uri.s+ pres_uri.len, ":", 1);
pres_uri.len+= 1;
}
if(pres_uri.len + c->aor->len+ 1 > BUF_LEN)
{
LM_ERR("buffer size overflown\n");
return NULL;
}
memcpy(pres_uri.s+ pres_uri.len, c->aor->s, c->aor->len);
pres_uri.len+= c->aor->len;
at = memchr(c->aor->s, '@', c->aor->len);
if(!at)
{
if(pres_uri.len + 2 + default_domain.len > BUF_LEN)
{
LM_ERR("buffer size
overflown\n");
return NULL;
}
pres_uri.s[pres_uri.len++]= '@';
memcpy(pres_uri.s+ pres_uri.len, default_domain.s,
default_domain.len);
pres_uri.len+= default_domain.len;
}
pres_uri.s[pres_uri.len]= '\0';
/* create the Publish body */
doc = xmlNewDoc(BAD_CAST "1.0");
if(doc==0)
{
LM_ERR("Failed to create new xml\n");
return NULL;
}
root_node = xmlNewNode(NULL, BAD_CAST "presence");
if(root_node==0)
{
LM_ERR("Cannot obtain root node\n");
goto error;
}
xmlDocSetRootElement(doc, root_node);
xmlNewProp(root_node, BAD_CAST "xmlns",
BAD_CAST
"urn:ietf:params:xml:ns:pidf");
xmlNewProp(root_node, BAD_CAST "xmlns:dm",
BAD_CAST
"urn:ietf:params:xml:ns:pidf:data-model");
xmlNewProp(root_node, BAD_CAST "xmlns:rpid",
BAD_CAST
"urn:ietf:params:xml:ns:pidf:rpid" );
xmlNewProp(root_node, BAD_CAST "xmlns:c",
BAD_CAST
"urn:ietf:params:xml:ns:pidf:cipid");
xmlNewProp(root_node, BAD_CAST "entity", BAD_CAST pres_uri.s);
tuple_node =xmlNewChild(root_node, NULL, BAD_CAST "tuple", NULL)
;
#ifdef REG_BY_PUBLISH
// Override default tuple id
xmlNewProp(tuple_node, BAD_CAST "id", BAD_CAST tuple_id);
#endif
if( tuple_node ==NULL)
{
LM_ERR("while adding child\n");
goto error;
}
status_node = xmlNewChild(tuple_node, NULL, BAD_CAST "status",
NULL) ;
if( status_node ==NULL)
{
LM_ERR("while adding child\n");
goto error;
}
#ifdef REG_BY_PUBLISH
basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic",
open ? (BAD_CAST "open") : (BAD_CAST
"close")) ;
#else
basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic",
BAD_CAST "open");
#endif
if( basic_node ==NULL)
{
LM_ERR("while adding child\n");
goto error;
}
#ifdef REG_BY_PUBLISH
/* LM_ERR("Content is %s \n", content); */
note_node = xmlNewChild(tuple_node, NULL, BAD_CAST "note",
BAD_CAST content);
if ( note_node == NULL)
{
LM_ERR("while adding child\n");
goto error;
}
#endif
body = (str*)pkg_malloc(sizeof(str));
if(body == NULL)
{
LM_ERR("while allocating memory\n");
return NULL;
}
memset(body, 0, sizeof(str));
xmlDocDumpFormatMemory(doc,(unsigned
char**)(void*)&body->s,&body->len,1);
LM_DBG("new_body:\n%.*s\n",body->len, body->s);
/*free the document */
xmlFreeDoc(doc);
xmlCleanupParser();
return body;
error:
if(body)
{
if(body->s)
xmlFree(body->s);
pkg_free(body);
}
if(doc)
xmlFreeDoc(doc);
return NULL;
}
void ul_publish(ucontact_t* c, int type, void* param)
{
str* body= NULL;
str uri= {NULL, 0};
char* at= NULL;
publ_info_t* publ= NULL;
int size= 0;
str content_type;
int error;
content_type.s= "application/pidf+xml";
content_type.len= 20;
#ifndef REG_BY_PUBLISH
if(pua_ul_publish==0 && pua_ul_bmask==0)
{
LM_INFO("should not send ul publish\n");
return;
}
#endif
if(pua_ul_bmask!=0 && (c->cflags & pua_ul_bmask)==0)
{
LM_INFO("not marked for publish\n");
return;
}
if(type & UL_CONTACT_DELETE) {
LM_DBG("\nDELETE type\n");
} else {
if(type & UL_CONTACT_INSERT) {
LM_DBG("\nINSERT type\n");
} else {
if(type & UL_CONTACT_UPDATE) {
LM_DBG("\nUPDATE
type\n");
} else {
if(type &
UL_CONTACT_EXPIRE) {
LM_DBG("\nEXPIRE type\n");
}
}
}
}
#ifdef REG_BY_PUBLISH
int online = type & UL_CONTACT_INSERT || type &
UL_CONTACT_UPDATE;
if (online && state_buffer)
online = strcmp(state_buffer, "closed") != 0;
body= build_pidf(c, online, device_buffer ? device_buffer :
"device", content_buffer ? content_buffer : "");
if(online && (body == NULL || body->s == NULL))
goto error;
#else
if(type & UL_CONTACT_INSERT)
{
body= build_pidf(c);
if(body == NULL || body->s == NULL)
goto error;
}
else
body = NULL;
#endif
uri.s =
(char*)pkg_malloc(sizeof(char)*(c->aor->len+default_domain.len+6));
if(uri.s == NULL)
goto error;
memcpy(uri.s, "sip:", 4);
uri.len = 4;
memcpy(uri.s+ uri.len, c->aor->s, c->aor->len);
uri.len+= c->aor->len;
at = memchr(c->aor->s, '@', c->aor->len);
if(!at)
{
uri.s[uri.len++]= '@';
memcpy(uri.s+ uri.len, default_domain.s,
default_domain.len);
uri.len+= default_domain.len;
}
LM_DBG("uri= %.*s\n", uri.len, uri.s);
size= sizeof(publ_info_t)+ sizeof(str)+( uri.len
+c->callid.len+ 12 +
content_type.len)*sizeof(char);
if(body)
size+= sizeof(str)+ body->len* sizeof(char);
publ= (publ_info_t*)pkg_malloc(size);
if(publ== NULL)
{
LM_ERR("no more share memory\n");
goto error;
}
memset(publ, 0, size);
size= sizeof(publ_info_t);
publ->pres_uri= (str*)((char*)publ + size);
size+= sizeof(str);
publ->pres_uri->s= (char*)publ+ size;
memcpy(publ->pres_uri->s, uri.s, uri.len);
publ->pres_uri->len= uri.len;
size+= uri.len;
if(body)
{
publ->body= (str*)( (char*)publ + size);
size+= sizeof(str);
publ->body->s= (char*)publ + size;
memcpy(publ->body->s, body->s, body->len);
publ->body->len= body->len;
size+= body->len;
}
publ->id.s= (char*)publ+ size;
memcpy(publ->id.s, "UL_PUBLISH.", 11);
memcpy(publ->id.s+11, c->callid.s, c->callid.len);
publ->id.len= 11+ c->callid.len;
size+= publ->id.len;
publ->content_type.s= (char*)publ+ size;
memcpy(publ->content_type.s, content_type.s, content_type.len);
publ->content_type.len= content_type.len;
size+= content_type.len;
if(type & UL_CONTACT_EXPIRE || type & UL_CONTACT_DELETE)
publ->expires= 0;
else
#ifndef REG_BY_PUBLISH
publ->expires= c->expires - (int)time(NULL);
#else
publ->expires = 900;
#endif
if(type & UL_CONTACT_INSERT)
publ->flag|= INSERT_TYPE;
else
publ->flag|= UPDATE_TYPE;
publ->source_flag|= UL_PUBLISH;
publ->event|= PRESENCE_EVENT;
publ->extra_headers= NULL;
print_publ(publ);
if((error=pua_send_publish(publ))< 0)
{
#ifdef REG_BY_PUBLISH
LM_ERR("while sending publish\n");
#else
LM_ERR("while sending publish for ul event %d\n",
type);
#endif
if(type & UL_CONTACT_UPDATE && error ==
ERR_PUBLISH_NO_BODY) {
/* This error can occur if Kamailio was
restarted/stopped and for any reason couldn't store a pua
* entry in 'pua' DB table. It can
also occur if 'pua' table is cleaned externally while Kamailio
* is stopped so cannot retrieve these
entries from DB when restarting.
* In these cases, when a refresh
registration for that user creates an UPDATE action in pua_usrloc,
* pua 'ul_publish()' would fail
since the appropiate entry doesn't exist in pua hast table ("New
* PUBLISH and no body found - invalid
request").
* This code solves this problem by invoking
an INSERT action if an UPDATE action failed due to the
* above error. It will however generate a
new presentity entry in the presence server (until the
* previous one expires), but this is a
minor issue. */
LM_ERR("UPDATE action generated a
PUBLISH without body -> invoking INSERT action\n");
ul_publish(c, UL_CONTACT_INSERT, param);
return;
}
}
pua_ul_publish= 0;
error:
if(publ)
pkg_free(publ);
if(body)
{
if(body->s)
xmlFree(body->s);
pkg_free(body);
}
if(uri.s)
pkg_free(uri.s);
pua_ul_publish= 0;
return;
}
Péter
From: Daniel-Constantin Mierla [mailto:miconda@gmail.com]
Sent: Monday, April 27, 2015 11:41 PM
To: Péter Barabás; Kamailio (SER) - Users Mailing List
Subject: Re: [SR-Users] kamailio crash periodically in timer handler
Hello,
got dragged by some other tasks for a while...
Can you list here all your loaded modules? Also, are you having event routes in your
config?
At this moment it looks like either a buffer overflow somewhere or a free of an shared
memory element without detaching from timer. I will need more details to narrow it down.
Cheers,
Daniel
On 23/04/15 17:54, Péter Barabás wrote:
Hi,
here it is:
$1 = {next = 0x0, prev = 0x7f64eb5b6bd0, expire = 1432560038, initial_timeout = 80, data =
0x7f64ebc3a888, f = 0x7f6510a2c165 <wait_handler>, flags = 513, slow_idx = 0}
Péter
From: sr-users [mailto:sr-users-bounces@lists.sip-router.org] On Behalf Of
Daniel-Constantin Mierla
Sent: Thursday, April 23, 2015 5:07 PM
To: Kamailio (SER) - Users Mailing List
Subject: Re: [SR-Users] kamailio crash periodically in timer handler
Hello,
can you get from gdb the output of:
p *tl
Cheers,
Daniel
On 23/04/15 17:00, Péter Barabás wrote:
Hi,
we have a serious problem with Kamailio v4.2.3, it crashes after some days. We have
checked the core dump and the gdb output is the following for bt and full backtrace:
#0 0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0,
slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877
#1 0x0000000000602746 in timer_handler () at timer.c:953
#2 0x0000000000602bb9 in timer_main () at timer.c:992
#3 0x00000000004a833c in main_loop () at main.c:1700
#4 0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578
#0 0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0,
slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877
tl = 0x7f64ebc3a908
ret = 0
#1 0x0000000000602746 in timer_handler () at timer.c:953
saved_ticks = 1432559962
run_slow_timer = 0
i = 865
__FUNCTION__ = "timer_handler"
#2 0x0000000000602bb9 in timer_main () at timer.c:992
No locals.
#3 0x00000000004a833c in main_loop () at main.c:1700
i = 4
pid = 0
si = 0x0
si_desc = "udp receiver child=3
sock=127.0.0.1:5060\000\375\362\250/\313\345\v\004\000\000\000\000\000\000\000\004\257\263=\000\000\000\000\200TA\000\000\000\000\000\060}i\226\377\177",
'\000' <repeats 19 times>,
"{i\226\377\177\000\000\376\366R\000\000\000\000\000\377\377\377\377\377\177\000\000@\350\250\000\000\000\000"
nrprocs = 4
__FUNCTION__ = "main_loop"
#4 0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578
cfg_stream = 0x1b94010
c = -1
r = 0
tmp = 0x7fff96699b17 ""
tmp_len = 0
port = 1
proto = 32767
options = 0x703e08
":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:"
ret = -1
seed = 2591880768
rfd = 4
debug_save = 0
debug_flag = 0
dont_fork_cnt = 0
n_lst = 0x0
p = 0x7fff96697c50 ""
__FUNCTION__ = "main"
In the logs it can be seen:
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: CRITICAL: <core>
[pass_fd.c:293]: receive_fd(): EOF on 21
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:784]:
handle_sigs(): child process 4623 exited by a signal 11
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:787]:
handle_sigs(): core was generated
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [main.c:799]:
handle_sigs(): terminating due to SIGCHLD
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4631]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4628]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4634]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4615]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4617]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4626]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4625]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4624]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4632]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4622]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4621]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4620]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4619]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4618]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4616]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4614]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4629]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4627]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4633]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4630]: INFO: <core> [main.c:850]:
sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core>
[mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64eca19950/0x7f64eca19988) -
ignore
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core>
[mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64ed587f20/0x7f64ed587f58) -
ignore
Kamailio runs on Ubuntu trusty. The interesting thing that we run Kamailio 4.2.3 in an
other environment (on Debian Squeeze) and it works perfect, no crashes. The main
difference is that in Debian environment we have less number of users and the firewall is
not so strength. But the configuration and the source is the same for both.
Please help us to solve this problem. Thanks.
Péter
_______________________________________________
SIP Express Router (SER) and Kamailio (OpenSER) - sr-users mailing list
sr-users@lists.sip-router.org<mailto:sr-users@lists.sip-router.org>
http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-users
--
Daniel-Constantin Mierla
http://twitter.com/#!/miconda<http://twitter.com/#%21/miconda> -
http://www.linkedin.com/in/miconda
Kamailio World Conference, May 27-29, 2015
Berlin, Germany -
http://www.kamailioworld.com
--
Daniel-Constantin Mierla
http://twitter.com/#!/miconda -
http://www.linkedin.com/in/miconda
Kamailio World Conference, May 27-29, 2015
Berlin, Germany -
http://www.kamailioworld.com