Hi,
you gave me an idea where to find the error. We modified the ul_publish.c source in order to achieve the following results:
- when client sends a REGISTER, an implicit PUBLISH is called so in each registration, publication should also happen
- when tcp socket is lost, we want that the contacts of the lost user get a presence NOTIFY with open=0 state to show the unregistered (lost) state
Here is the ul_publish.c source, maybe the bug is in it. But in our dev system, it works like a charm:
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <libxml/parser.h> #include <time.h>
#include "../../parser/parse_expires.h" #include "../../parser/msg_parser.h" #include "../../str.h" #include "../../dset.h" #include "../usrloc/usrloc.h" #include "../usrloc/ul_callback.h" #include "../../modules/tm/tm_load.h" #include "../pua/pua.h" #include "pua_usrloc.h"
#define BUF_LEN 256 #define REG_BY_PUBLISH
#ifdef REG_BY_PUBLISH
static char* device_buffer = NULL; static char* state_buffer = NULL; static char* content_buffer = NULL; static void replace_string(char** dst, const char* source, int len) { if (*dst) { free(*dst); *dst = NULL; }
if (source) { *dst = malloc(len+1); strncpy(*dst, source, len); (*dst)[len] = 0; } }
#endif
int pua_set_publish(struct sip_msg* msg , char* s1, char* s2) { #ifdef REG_BY_PUBLISH const char* first_delimiter = NULL; const char* second_delimiter = NULL; // Prepare place to store data replace_string(&device_buffer, NULL, 0); replace_string(&state_buffer, NULL, 0); replace_string(&content_buffer, NULL, 0);
if (s1) { first_delimiter = strchr(s1, ';'); if (first_delimiter) second_delimiter = strchr(first_delimiter+1, ';');
if (first_delimiter) { replace_string(&device_buffer, s1, first_delimiter - s1);
if (second_delimiter) { replace_string(&state_buffer, first_delimiter+1, second_delimiter - first_delimiter - 1); replace_string(&content_buffer, second_delimiter+1, strlen(s1) - (second_delimiter - s1 + 1)); } } } if (strlen(s1) && (!device_buffer || !state_buffer || !content_buffer)) { LM_WARN("Failed to parse PresenceInReg header\n"); return 1; }
if (device_buffer) LM_DBG("Device: %s\n", device_buffer); if (state_buffer) LM_DBG("State: %s\n", state_buffer); if (content_buffer) LM_DBG("Content: %s\n", content_buffer); #endif pua_ul_publish= 1; if(pua_ul_bmask!=0) setbflag(0, pua_ul_bflag); return 1; }
int pua_unset_publish(struct sip_msg* msg, unsigned int flags, void* param) { // LM_ERR("unset send publish\n"); pua_ul_publish= 0;
if(pua_ul_bmask!=0) resetbflag(0, pua_ul_bflag); return 1; }
/* for debug purpose only */ void print_publ(publ_info_t* p) { LM_DBG("publ:\n"); LM_DBG("uri= %.*s\n", p->pres_uri->len, p->pres_uri->s); LM_DBG("id= %.*s\n", p->id.len, p->id.s); LM_DBG("expires= %d\n", p->expires); }
str* build_pidf(ucontact_t* c #ifdef REG_BY_PUBLISH , int open, const char* tuple_id, const char* content #endif ) { xmlDocPtr doc = NULL; xmlNodePtr root_node = NULL; xmlNodePtr tuple_node = NULL; xmlNodePtr status_node = NULL; xmlNodePtr basic_node = NULL; #ifdef REG_BY_PUBLISH xmlNodePtr note_node = NULL; #endif str *body= NULL; str pres_uri= {NULL, 0}; char buf[BUF_LEN]; char* at= NULL;
if(c->expires< (int)time(NULL)) { LM_ERR("PUBLISH: found expired \n\n"); #ifdef REG_BY_PUBLISH open = 0; #else return NULL; #endif }
pres_uri.s = buf; if(pres_prefix.s) { memcpy(pres_uri.s, pres_prefix.s, pres_prefix.len); pres_uri.len+= pres_prefix.len; memcpy(pres_uri.s+ pres_uri.len, ":", 1); pres_uri.len+= 1; } if(pres_uri.len + c->aor->len+ 1 > BUF_LEN) { LM_ERR("buffer size overflown\n"); return NULL; }
memcpy(pres_uri.s+ pres_uri.len, c->aor->s, c->aor->len); pres_uri.len+= c->aor->len;
at = memchr(c->aor->s, '@', c->aor->len); if(!at) { if(pres_uri.len + 2 + default_domain.len > BUF_LEN) { LM_ERR("buffer size overflown\n"); return NULL; }
pres_uri.s[pres_uri.len++]= '@'; memcpy(pres_uri.s+ pres_uri.len, default_domain.s, default_domain.len); pres_uri.len+= default_domain.len; } pres_uri.s[pres_uri.len]= '\0';
/* create the Publish body */ doc = xmlNewDoc(BAD_CAST "1.0"); if(doc==0) { LM_ERR("Failed to create new xml\n"); return NULL; } root_node = xmlNewNode(NULL, BAD_CAST "presence"); if(root_node==0) { LM_ERR("Cannot obtain root node\n"); goto error; } xmlDocSetRootElement(doc, root_node);
xmlNewProp(root_node, BAD_CAST "xmlns", BAD_CAST "urn:ietf:params:xml:ns:pidf"); xmlNewProp(root_node, BAD_CAST "xmlns:dm", BAD_CAST "urn:ietf:params:xml:ns:pidf:data-model"); xmlNewProp(root_node, BAD_CAST "xmlns:rpid", BAD_CAST "urn:ietf:params:xml:ns:pidf:rpid" ); xmlNewProp(root_node, BAD_CAST "xmlns:c", BAD_CAST "urn:ietf:params:xml:ns:pidf:cipid"); xmlNewProp(root_node, BAD_CAST "entity", BAD_CAST pres_uri.s);
tuple_node =xmlNewChild(root_node, NULL, BAD_CAST "tuple", NULL) ;
#ifdef REG_BY_PUBLISH // Override default tuple id xmlNewProp(tuple_node, BAD_CAST "id", BAD_CAST tuple_id); #endif
if( tuple_node ==NULL) { LM_ERR("while adding child\n"); goto error; }
status_node = xmlNewChild(tuple_node, NULL, BAD_CAST "status", NULL) ; if( status_node ==NULL) { LM_ERR("while adding child\n"); goto error; }
#ifdef REG_BY_PUBLISH basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic", open ? (BAD_CAST "open") : (BAD_CAST "close")) ; #else basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic", BAD_CAST "open"); #endif
if( basic_node ==NULL) { LM_ERR("while adding child\n"); goto error; }
#ifdef REG_BY_PUBLISH /* LM_ERR("Content is %s \n", content); */
note_node = xmlNewChild(tuple_node, NULL, BAD_CAST "note", BAD_CAST content); if ( note_node == NULL) { LM_ERR("while adding child\n"); goto error; } #endif
body = (str*)pkg_malloc(sizeof(str)); if(body == NULL) { LM_ERR("while allocating memory\n"); return NULL; } memset(body, 0, sizeof(str));
xmlDocDumpFormatMemory(doc,(unsigned char**)(void*)&body->s,&body->len,1);
LM_DBG("new_body:\n%.*s\n",body->len, body->s);
/*free the document */ xmlFreeDoc(doc); xmlCleanupParser();
return body;
error: if(body) { if(body->s) xmlFree(body->s); pkg_free(body); } if(doc) xmlFreeDoc(doc); return NULL; }
void ul_publish(ucontact_t* c, int type, void* param) { str* body= NULL; str uri= {NULL, 0}; char* at= NULL; publ_info_t* publ= NULL; int size= 0; str content_type; int error;
content_type.s= "application/pidf+xml"; content_type.len= 20;
#ifndef REG_BY_PUBLISH if(pua_ul_publish==0 && pua_ul_bmask==0) { LM_INFO("should not send ul publish\n"); return; } #endif if(pua_ul_bmask!=0 && (c->cflags & pua_ul_bmask)==0) { LM_INFO("not marked for publish\n"); return; }
if(type & UL_CONTACT_DELETE) { LM_DBG("\nDELETE type\n"); } else { if(type & UL_CONTACT_INSERT) { LM_DBG("\nINSERT type\n"); } else { if(type & UL_CONTACT_UPDATE) { LM_DBG("\nUPDATE type\n"); } else { if(type & UL_CONTACT_EXPIRE) { LM_DBG("\nEXPIRE type\n"); } } } } #ifdef REG_BY_PUBLISH int online = type & UL_CONTACT_INSERT || type & UL_CONTACT_UPDATE; if (online && state_buffer) online = strcmp(state_buffer, "closed") != 0;
body= build_pidf(c, online, device_buffer ? device_buffer : "device", content_buffer ? content_buffer : ""); if(online && (body == NULL || body->s == NULL)) goto error; #else if(type & UL_CONTACT_INSERT) { body= build_pidf(c); if(body == NULL || body->s == NULL) goto error; } else body = NULL; #endif
uri.s = (char*)pkg_malloc(sizeof(char)*(c->aor->len+default_domain.len+6)); if(uri.s == NULL) goto error;
memcpy(uri.s, "sip:", 4); uri.len = 4; memcpy(uri.s+ uri.len, c->aor->s, c->aor->len); uri.len+= c->aor->len; at = memchr(c->aor->s, '@', c->aor->len); if(!at) { uri.s[uri.len++]= '@'; memcpy(uri.s+ uri.len, default_domain.s, default_domain.len); uri.len+= default_domain.len; } LM_DBG("uri= %.*s\n", uri.len, uri.s);
size= sizeof(publ_info_t)+ sizeof(str)+( uri.len +c->callid.len+ 12 + content_type.len)*sizeof(char);
if(body) size+= sizeof(str)+ body->len* sizeof(char);
publ= (publ_info_t*)pkg_malloc(size); if(publ== NULL) { LM_ERR("no more share memory\n"); goto error; } memset(publ, 0, size); size= sizeof(publ_info_t);
publ->pres_uri= (str*)((char*)publ + size); size+= sizeof(str); publ->pres_uri->s= (char*)publ+ size; memcpy(publ->pres_uri->s, uri.s, uri.len); publ->pres_uri->len= uri.len; size+= uri.len;
if(body) { publ->body= (str*)( (char*)publ + size); size+= sizeof(str);
publ->body->s= (char*)publ + size; memcpy(publ->body->s, body->s, body->len); publ->body->len= body->len; size+= body->len; } publ->id.s= (char*)publ+ size; memcpy(publ->id.s, "UL_PUBLISH.", 11); memcpy(publ->id.s+11, c->callid.s, c->callid.len); publ->id.len= 11+ c->callid.len; size+= publ->id.len;
publ->content_type.s= (char*)publ+ size; memcpy(publ->content_type.s, content_type.s, content_type.len); publ->content_type.len= content_type.len; size+= content_type.len;
if(type & UL_CONTACT_EXPIRE || type & UL_CONTACT_DELETE) publ->expires= 0; else #ifndef REG_BY_PUBLISH publ->expires= c->expires - (int)time(NULL); #else publ->expires = 900; #endif
if(type & UL_CONTACT_INSERT) publ->flag|= INSERT_TYPE; else publ->flag|= UPDATE_TYPE;
publ->source_flag|= UL_PUBLISH; publ->event|= PRESENCE_EVENT; publ->extra_headers= NULL; print_publ(publ); if((error=pua_send_publish(publ))< 0) { #ifdef REG_BY_PUBLISH LM_ERR("while sending publish\n"); #else LM_ERR("while sending publish for ul event %d\n", type); #endif if(type & UL_CONTACT_UPDATE && error == ERR_PUBLISH_NO_BODY) { /* This error can occur if Kamailio was restarted/stopped and for any reason couldn't store a pua * entry in 'pua' DB table. It can also occur if 'pua' table is cleaned externally while Kamailio * is stopped so cannot retrieve these entries from DB when restarting. * In these cases, when a refresh registration for that user creates an UPDATE action in pua_usrloc, * pua 'ul_publish()' would fail since the appropiate entry doesn't exist in pua hast table ("New * PUBLISH and no body found - invalid request"). * This code solves this problem by invoking an INSERT action if an UPDATE action failed due to the * above error. It will however generate a new presentity entry in the presence server (until the * previous one expires), but this is a minor issue. */ LM_ERR("UPDATE action generated a PUBLISH without body -> invoking INSERT action\n"); ul_publish(c, UL_CONTACT_INSERT, param); return; } }
pua_ul_publish= 0;
error:
if(publ) pkg_free(publ);
if(body) { if(body->s) xmlFree(body->s); pkg_free(body); }
if(uri.s) pkg_free(uri.s); pua_ul_publish= 0;
return;
}
Péter
From: Daniel-Constantin Mierla [mailto:miconda@gmail.com] Sent: Monday, April 27, 2015 11:41 PM To: Péter Barabás; Kamailio (SER) - Users Mailing List Subject: Re: [SR-Users] kamailio crash periodically in timer handler
Hello,
got dragged by some other tasks for a while...
Can you list here all your loaded modules? Also, are you having event routes in your config?
At this moment it looks like either a buffer overflow somewhere or a free of an shared memory element without detaching from timer. I will need more details to narrow it down.
Cheers, Daniel On 23/04/15 17:54, Péter Barabás wrote: Hi,
here it is:
$1 = {next = 0x0, prev = 0x7f64eb5b6bd0, expire = 1432560038, initial_timeout = 80, data = 0x7f64ebc3a888, f = 0x7f6510a2c165 <wait_handler>, flags = 513, slow_idx = 0}
Péter
From: sr-users [mailto:sr-users-bounces@lists.sip-router.org] On Behalf Of Daniel-Constantin Mierla Sent: Thursday, April 23, 2015 5:07 PM To: Kamailio (SER) - Users Mailing List Subject: Re: [SR-Users] kamailio crash periodically in timer handler
Hello,
can you get from gdb the output of:
p *tl
Cheers, Daniel On 23/04/15 17:00, Péter Barabás wrote: Hi,
we have a serious problem with Kamailio v4.2.3, it crashes after some days. We have checked the core dump and the gdb output is the following for bt and full backtrace:
#0 0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0, slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877 #1 0x0000000000602746 in timer_handler () at timer.c:953 #2 0x0000000000602bb9 in timer_main () at timer.c:992 #3 0x00000000004a833c in main_loop () at main.c:1700 #4 0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578
#0 0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0, slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877 tl = 0x7f64ebc3a908 ret = 0 #1 0x0000000000602746 in timer_handler () at timer.c:953 saved_ticks = 1432559962 run_slow_timer = 0 i = 865 __FUNCTION__ = "timer_handler" #2 0x0000000000602bb9 in timer_main () at timer.c:992 No locals. #3 0x00000000004a833c in main_loop () at main.c:1700 i = 4 pid = 0 si = 0x0 si_desc = "udp receiver child=3 sock=127.0.0.1:5060\000\375\362\250/\313\345\v\004\000\000\000\000\000\000\000\004\257\263=\000\000\000\000\200TA\000\000\000\000\000\060}i\226\377\177", '\000' <repeats 19 times>, "{i\226\377\177\000\000\376\366R\000\000\000\000\000\377\377\377\377\377\177\000\000@\350\250\000\000\000\000" nrprocs = 4 __FUNCTION__ = "main_loop" #4 0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578 cfg_stream = 0x1b94010 c = -1 r = 0 tmp = 0x7fff96699b17 "" tmp_len = 0 port = 1 proto = 32767 options = 0x703e08 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:" ret = -1 seed = 2591880768 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x0 p = 0x7fff96697c50 "" __FUNCTION__ = "main"
In the logs it can be seen:
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: CRITICAL: <core> [pass_fd.c:293]: receive_fd(): EOF on 21 Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:784]: handle_sigs(): child process 4623 exited by a signal 11 Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:787]: handle_sigs(): core was generated Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [main.c:799]: handle_sigs(): terminating due to SIGCHLD Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4631]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4628]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4634]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4615]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4617]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4626]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4625]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4624]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4632]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4622]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4621]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4620]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4619]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4618]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4616]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4614]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4629]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4627]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4633]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4630]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64eca19950/0x7f64eca19988) - ignore Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64ed587f20/0x7f64ed587f58) - ignore
Kamailio runs on Ubuntu trusty. The interesting thing that we run Kamailio 4.2.3 in an other environment (on Debian Squeeze) and it works perfect, no crashes. The main difference is that in Debian environment we have less number of users and the firewall is not so strength. But the configuration and the source is the same for both.
Please help us to solve this problem. Thanks.
Péter
_______________________________________________
SIP Express Router (SER) and Kamailio (OpenSER) - sr-users mailing list
sr-users@lists.sip-router.orgmailto:sr-users@lists.sip-router.org
http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-users
--
Daniel-Constantin Mierla
http://twitter.com/#!/micondahttp://twitter.com/#%21/miconda - http://www.linkedin.com/in/miconda
Kamailio World Conference, May 27-29, 2015
Berlin, Germany - http://www.kamailioworld.com
--
Daniel-Constantin Mierla
http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda
Kamailio World Conference, May 27-29, 2015
Berlin, Germany - http://www.kamailioworld.com