Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). And this is much more than in our production environment, with just 600 subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
####### Global Parameters #########
debug=0 log_stderror=no log_facility=LOG_LOCAL0
fork=yes children=4
/* uncomment the following lines to enable debugging */ #debug=6 #fork=no #log_stderror=yes
/* uncomment the next line to disable TCP (default on) */ disable_tcp=yes
/* uncomment the next line to enable the auto temporary blacklisting of not available destinations (default disabled) */ #disable_dns_blacklist=no
/* uncomment the next line to enable IPv6 lookup after IPv4 dns lookup failures (default disabled) */ #dns_try_ipv6=yes
/* uncomment the next line to disable the auto discovery of local aliases based on revers DNS on IPs (default on) */ #auto_aliases=no
/* uncomment the following lines to enable TLS support (default off) */ #disable_tls = no #listen = tls:your_IP:5061 #tls_verify_server = 1 #tls_verify_client = 1 #tls_require_client_certificate = 0 #tls_method = TLSv1 #tls_certificate = "/usr/local/etc/openser/tls/user/user-cert.pem" #tls_private_key = "/usr/local/etc/openser/tls/user/user-privkey.pem" #tls_ca_list = "/usr/local/etc/openser/tls/user/user-calist.pem"
port=5060
/* uncomment and configure the following line if you want openser to bind on a specific interface/port/proto (default bind on all available) */ #listen=udp:202.173.5.181:5060
####### Modules Section ########
#set module path mpath="/usr/local/lib/openser/modules/"
/* uncomment next line for MySQL DB support */ loadmodule "mysql.so" loadmodule "sl.so" loadmodule "tm.so" loadmodule "rr.so" loadmodule "maxfwd.so" loadmodule "usrloc.so" loadmodule "registrar.so" loadmodule "textops.so" loadmodule "mi_fifo.so" loadmodule "uri_db.so" loadmodule "uri.so" loadmodule "xlog.so" loadmodule "acc.so" loadmodule "carrierroute.so" loadmodule "nathelper.so" loadmodule "dialog.so" loadmodule "snmpstats.so" /* uncomment next lines for MySQL based authentication support NOTE: a DB (like mysql) module must be also loaded */ loadmodule "auth.so" loadmodule "auth_db.so" loadmodule "lcr.so" /* uncomment next line for aliases support NOTE: a DB (like mysql) module must be also loaded */ loadmodule "alias_db.so" /* uncomment next line for multi-domain support NOTE: a DB (like mysql) module must be also loaded NOTE: be sure and enable multi-domain support in all used modules (see "multi-module params" section ) */ loadmodule "domain.so" /* uncomment the next two lines for presence server support NOTE: a DB (like mysql) module must be also loaded */ #loadmodule "presence.so" #loadmodule "presence_xml.so" loadmodule "uac.so" loadmodule "avpops.so" # ----------------- setting module-specific parameters ---------------
# ----- mi_fifo params ----- modparam("mi_fifo", "fifo_name", "/tmp/openser_fifo")
# ----- rr params ----- # add value to ;lr param to cope with most of the UAs modparam("rr", "enable_full_lr", 1) # do not append from tag to the RR (no need for this script) modparam("rr", "append_fromtag", 1)
# ----- rr params ----- modparam("registrar", "method_filtering", 1) /* uncomment the next line to disable parallel forking via location */ # modparam("registrar", "append_branches", 0) /* uncomment the next line not to allow more than 10 contacts per AOR */ modparam("registrar", "max_contacts", 10) modparam("registrar", "min_expires", 30) modparam("registrar", "max_expires", 40) modparam("registrar", "default_expires", 35) # ----- uri_db params ----- /* by default we disable the DB support in the module as we do not need it in this configuration */ modparam("uri_db", "use_uri_table", 0) modparam("uri_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("uri_db", "use_domain", 1)
# ----- acc params ----- /* what sepcial events should be accounted ? */ modparam("acc", "early_media", 1) modparam("acc", "report_ack", 1) modparam("acc", "report_cancels", 1) /* by default ww do not adjust the direct of the sequential requests. if you enable this parameter, be sure the enable "append_fromtag" in "rr" module */ modparam("acc", "detect_direction", 0) /* account triggers (flags) */ modparam("acc", "failed_transaction_flag", 3) modparam("acc", "log_flag", 1) modparam("acc", "log_missed_flag", 2) /* uncomment the following lines to enable DB accounting also */ modparam("acc", "db_flag", 1) modparam("acc", "db_missed_flag", 2)
# ----- usrloc params ----- #modparam("usrloc", "db_mode", 0) /* uncomment the following lines if you want to enable DB persistency for location entries */ modparam("usrloc", "db_mode", 2) modparam("usrloc", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("usrloc", "use_domain", 1)
# ----- auth_db params ----- /* uncomment the following lines if you want to enable the DB based authentication */ modparam("auth_db", "calculate_ha1", yes) modparam("auth_db", "password_column", "password") modparam("auth_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("auth_db", "load_credentials", "$avp(s:rpid)=rpid;$avp(s:blocked)=subscriber_status")
# ----- alias_db params ----- /* uncomment the following lines if you want to enable the DB based aliases */ modparam("alias_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("alias_db", "use_domain", 0)
# ----- domain params ----- /* uncomment the following lines to enable multi-domain detection support */ modparam("domain", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("domain", "db_mode", 1) # Use caching
# ----- multi-module params ----- /* uncomment the following line if you want to enable multi-domain support in the modules (dafault off) */ #modparam("alias_db|auth_db|usrloc|uri_db", "use_domain", 1)
# ----- presence params ----- /* uncomment the following lines if you want to enable presence */ #modparam("presence|presence_xml", "db_url", # "mysql://openser:openserrw@localhost/openser") #modparam("presence_xml", "force_active", 1) #modparam("presence", "server_address", "sip:192.168.1.2:5060")
# ----- carrieroute params ----- modparam("carrierroute", "db_url", "mysql://openser:openserrw@localhost /openser") modparam("carrierroute", "config_source", "db") modparam("carrierroute", "use_domain", 1) # ----- NatHelper ----- #para versao a partir da versao 1.2 eh necessario esse paramtro para nao dar erro qdo usa a funcao "fix_nated_register();" modparam("nathelper|registrar", "received_avp", "$avp(i:42)") modparam("nathelper", "rtpproxy_sock", "udp:127.0.0.1:22222")
# ----- LCR ----- modparam("lcr", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("lcr|tm", "fr_inv_timer_avp", "$avp(i:704)") modparam("lcr", "gw_uri_avp", "$avp(i:709)") modparam("^auth$|lcr", "rpid_avp", "$avp(i:302)") modparam("lcr", "contact_avp", "$avp(i:711)") modparam("lcr", "ruri_user_avp", "$avp(i:500)") modparam("lcr", "dm_flag", 25)
# ----- Dialog ---- modparam("dialog", "dlg_flag", 4)
# ----- SnmpStat ----- modparam("snmpstats", "sipEntityType", "registrarServer") modparam("snmpstats", "sipEntityType", "proxyServer") modparam("snmpstats", "MsgQueueMinorThreshold", 2000) modparam("snmpstats", "MsgQueueMajorThreshold", 5000) modparam("snmpstats", "dlg_minor_threshold", 500) modparam("snmpstats", "dlg_major_threshold", 750) modparam("snmpstats", "snmpgetPath","/usr/bin/") modparam("snmpstats", "snmpCommunity","public")
####### Routing Logic ########
# main request routing logic
route{
if (!mf_process_maxfwd_header("10")) { sl_send_reply("483","Too Many Hops"); exit; }
##nat route(2);
if (has_totag()) { # sequential request withing a dialog should # take the path determined by record-routing if (loose_route()) { if (is_method("BYE")) { setflag(1); # do accouting ... setflag(3); # ... even if the transaction fails } route(1); } else { /* uncomment the following lines if you want to enable presence */ ##if (is_method("SUBSCRIBE") && $rd == "your.server.ip.address") { ## # in-dialog subscribe requests ## route(2); ## exit; ##} if ( is_method("ACK") ) { if ( t_check_trans() ) { # non loose-route, but stateful ACK; must be an ACK after a 487 or e.g. 404 from upstream server #t_relay(); #exit; route(1); } else { # ACK without matching transaction ... ignore and discard.\n"); exit; } } sl_send_reply("404","Not here"); } exit; }
#initial requests setflag(4); #for dialog statistics
# CANCEL processing if (is_method("CANCEL")) { if (t_check_trans()) route(1); # t_relay(); # exit; }
#t_check_trans();
if (is_method("PUBLISH|SUBSCRIBE|REFER|OPTIONS|MESSAGE")) { sl_send_reply("405", "Method not allowed"); exit; }
# authenticate if from local subscriber (uncomment to enable auth) if (!(method=="REGISTER") && (!from_gw())) { if (!proxy_authorize("", "subscriber")) { proxy_challenge("", "0"); exit; } if (!check_from()) { sl_send_reply("403","Forbidden auth ID"); exit; }else if (avp_check("$avp(s:blocked)", "eq/0")) { sl_send_reply("603","Subscriber disabled"); exit; }else if (avp_check("$avp(s:blocked)", "eq/1")) { sl_send_reply("603","Subscriber with outgoing blocked"); exit; }
consume_credentials(); # caller authenticated }
# record routing if (!is_method("REGISTER|MESSAGE")) record_route();
# account only INVITEs if (is_method("INVITE")) { setflag(1); # do accouting }
if (is_method("REGISTER")) { # authenticate the REGISTER requests (uncomment to enable auth) if (!proxy_authorize("", "subscriber")) { proxy_challenge("", "0"); exit; }
if (!check_to()) { sl_send_reply("403","Forbidden auth ID"); exit; }else if (avp_check("$avp(s:blocked)", "eq/0")) { sl_send_reply("403","Subscriber disabled"); exit; }
if (!save("location")) sl_reply_error();
exit; }
if ($rU==NULL) { # request with no Username in RURI sl_send_reply("484","Address Incomplete"); exit; }
# apply DB based aliases (uncomment to enable) ##alias_db_lookup("dbaliases"); #if the call came from a known gateway it is not authenticated and we cannot use the function check_from() if (from_gw()) { route(4); }else if (!check_from()) {#if the check_from() returns false the call is not from a subscriber route(4); } else {#it is a subscriber, route using flip domain xlog("L_INFO", "routing using carrierroute $rm to $ru\n"); if (!cr_user_rewrite_uri("$fu", "flip")) { t_newtran(); t_reply("404", "No Route"); exit; } #replaces from by it's default DID uac_replace_from("sip:$avp(s:rpid)@$fd"); }
# when routing via usrloc, log the missed calls also setflag(2);
route(1); }
route[1] { xlog("L_INFO", "ROUTE_1 $rm to $ru\n"); if (subst_uri('/(sip:.*);nat=yes/\1/')) { setflag(6); };
if (isflagset(5)||isflagset(6)) { route(3); }
if (!t_relay()) { sl_reply_error(); }; exit; }
route[2] { xlog("L_INFO", "ROUTE_2 $rm to $ru\n"); if (method=="REGISTER") { fix_nated_register(); } else if (!from_gw()){ fix_nated_contact(); }; setflag(5); } route[3] { xlog("L_INFO", "ROUTE_3 $rm to $ru\n"); if (is_method("BYE|CANCEL")) { unforce_rtp_proxy(); } else if (is_method("INVITE")) { xlog("L_INFO", "FORCE RTP w/ parameter.\n"); force_rtp_proxy("r"); t_on_failure("1"); }; if (isflagset(5)) search_append('Contact:.*sip:[^>[:cntrl:]]*', ';nat=yes'); t_on_reply("1"); }
route[4] {
xlog("L_INFO", "uri does exist $rm to $ru \n"); if (alias_db_lookup("dbaliases")){
if (!lookup("location")) { switch ($retcode) { case -1: t_newtran(); t_reply("404", "Subscriber not online"); exit; case -2: sl_send_reply("405", "Method Not Allowed"); exit; } } }else{#check if did is blocked $rU = "(BLK)" + $rU; if (alias_db_lookup("dbaliases")){ sl_send_reply("403", "DID blocked"); exit;
}else{# if it is not a valid DID nor a blocked DID tries to route it using peering domain if (!cr_rewrite_uri("peering", "call_id")) { t_newtran(); t_reply("404", "Peering Not Found"); exit; }
} } }
failure_route[1] { xlog("L_INFO", "FAILURE $rm to $ru\n"); if (isflagset(6)||isflagset(5)) { unforce_rtp_proxy(); } }
onreply_route[1] { xlog("L_INFO", "ONREPLY_1 - Status $rs from $si $rm .\n"); if (is_method("INVITE")) { if ((isflagset(5)||isflagset(6)) && status=~"(183)|(2[0-9][0-9])") { force_rtp_proxy(); } search_append('Contact:.*sip:[^>[:cntrl:]]*', ';nat=yes');
if (!from_gw()){ #if (isflagset(6)) { xlog("L_INFO", "ONREPLY_1 - ! from gw.\n"); fix_nated_contact(); } exit; } }
Regards, takeshi
Hello,
On 09/23/08 10:31, mayamatakeshi wrote:
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem.
openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right?
After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc).
We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated.
There could be two reasons: - there is memory leak but happens in some cases that you don't reproduce in lab, but they are in the production environment - you get memory fragmentation
Let's see first the debug messages...
Cheers, Daniel
And this is much more than in our production environment, with just 600 subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
[...]
On Tue, Sep 23, 2008 at 6:00 PM, Daniel-Constantin Mierla <miconda@gmail.com
wrote:
Hello,
On 09/23/08 10:31, mayamatakeshi wrote:
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem.
openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right?
Yes. We started with rev. 4444, then we moved to newer revisions each time the problem happened.
After some time logging messages like this:
/openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc).
We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated.
There could be two reasons:
- there is memory leak but happens in some cases that you don't reproduce
in lab, but they are in the production environment
- you get memory fragmentation
Let's see first the debug messages...
OK. I'll prepare our environment for this and get the logs with memory manager compiled for debug. Thank you.
And this is much more than in our production environment, with just 600
subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
[...]
-- Daniel-Constantin Mierla http://www.asipto.com
On Tue, Sep 23, 2008 at 6:00 PM, Daniel-Constantin Mierla <miconda@gmail.com
wrote:
Hello,
On 09/23/08 10:31, mayamatakeshi wrote:
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem.
openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right?
After some time logging messages like this:
/openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc).
We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated.
There could be two reasons:
- there is memory leak but happens in some cases that you don't reproduce
in lab, but they are in the production environment
- you get memory fragmentation
Let's see first the debug messages...
Hello, here are the link for openser.log and cfg files: http://www.yousendit.com/download/bVlEV0o4R3NoeWJIRGc9PQ
After compilation with debug flags for memory manager, I left openser running in production for 24 hours. Then, I moved all traffic to another host and waited for more than 30 minutes before stopping openser. In the openser.cfg, I set debug=2. If you need, I can run it again with a higher value (but I hope it doesn't have to be too high, due to overhead concerns).
regards, takeshi
And this is much more than in our production environment, with just 600
subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
[...]
-- Daniel-Constantin Mierla http://www.asipto.com
On Fri, Sep 26, 2008 at 6:24 PM, mayamatakeshi mayamatakeshi@gmail.comwrote:
On Tue, Sep 23, 2008 at 6:00 PM, Daniel-Constantin Mierla < miconda@gmail.com> wrote:
Hello,
On 09/23/08 10:31, mayamatakeshi wrote:
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem.
openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right?
After some time logging messages like this:
/openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc).
We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated.
There could be two reasons:
- there is memory leak but happens in some cases that you don't reproduce
in lab, but they are in the production environment
- you get memory fragmentation
Let's see first the debug messages...
Hello, here are the link for openser.log and cfg files: http://www.yousendit.com/download/bVlEV0o4R3NoeWJIRGc9PQ
After compilation with debug flags for memory manager, I left openser running in production for 24 hours. Then, I moved all traffic to another host and waited for more than 30 minutes before stopping openser. In the openser.cfg, I set debug=2. If you need, I can run it again with a higher value (but I hope it doesn't have to be too high, due to overhead concerns).
Sorry, I forgot to tell one thing: the last revision that showed this problem was 4809, so we reverted back to that revision before performing the above.
regards, takeshi
And this is much more than in our production environment, with just 600
subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
[...]
-- Daniel-Constantin Mierla http://www.asipto.com
Hello,
mayamatakeshi wrote:
On Fri, Sep 26, 2008 at 6:24 PM, mayamatakeshi <mayamatakeshi@gmail.com mailto:mayamatakeshi@gmail.com> wrote:
On Tue, Sep 23, 2008 at 6:00 PM, Daniel-Constantin Mierla <miconda@gmail.com <mailto:miconda@gmail.com>> wrote: Hello, On 09/23/08 10:31, mayamatakeshi wrote: Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right? After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests. We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated. There could be two reasons: - there is memory leak but happens in some cases that you don't reproduce in lab, but they are in the production environment - you get memory fragmentation Let's see first the debug messages... Hello, here are the link for openser.log and cfg files: http://www.yousendit.com/download/bVlEV0o4R3NoeWJIRGc9PQ After compilation with debug flags for memory manager, I left openser running in production for 24 hours. Then, I moved all traffic to another host and waited for more than 30 minutes before stopping openser. In the openser.cfg, I set debug=2. If you need, I can run it again with a higher value (but I hope it doesn't have to be too high, due to overhead concerns).
Sorry, I forgot to tell one thing: the last revision that showed this problem was 4809, so we reverted back to that revision before performing the above.
to understand that you couldn't reproduce with latest svn version? So you had to get a previous version?
Cheers, Daniel
On Sun, Oct 5, 2008 at 5:28 PM, Daniel-Constantin Mierla miconda@gmail.comwrote:
Hello,
mayamatakeshi wrote:
On Fri, Sep 26, 2008 at 6:24 PM, mayamatakeshi <mayamatakeshi@gmail.commailto: mayamatakeshi@gmail.com> wrote:
On Tue, Sep 23, 2008 at 6:00 PM, Daniel-Constantin Mierla <miconda@gmail.com mailto:miconda@gmail.com> wrote:
Hello, On 09/23/08 10:31, mayamatakeshi wrote: Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right? After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests. We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated. There could be two reasons: - there is memory leak but happens in some cases that you don't reproduce in lab, but they are in the production environment - you get memory fragmentation Let's see first the debug messages...
Hello, here are the link for openser.log and cfg files: http://www.yousendit.com/download/bVlEV0o4R3NoeWJIRGc9PQ
After compilation with debug flags for memory manager, I left openser running in production for 24 hours. Then, I moved all traffic to another host and waited for more than 30 minutes before stopping openser. In the openser.cfg, I set debug=2. If you need, I can run it again with a higher value (but I hope it doesn't have to be too high, due to overhead concerns).
Sorry, I forgot to tell one thing: the last revision that showed this problem was 4809, so we reverted back to that revision before performing the above.
to understand that you couldn't reproduce with latest svn version? So you had to get a previous version?
Hi, no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
But here's some developments on my investigation: Up to now, I was trying to recreate the problem using VirtualMachines running the same OS (Fedora 5) as in production. It never happened there, even after 30 million of calls. But we eventually were able to test openser 1.3 using a production machine with the same spec as the ones showing the problem and we were able to generate pkg memory problem using a simple outgoing SIPp scenario. The problem always happens after we reach around 28.000 calls and we confirmed the amount of calls needed to cause the problem grows linearly with the amount of pkg memory (after increase of pkg memory pool by 4, problem started to happen only after around 128.000 calls). However, we also tried the same tests with kamailio 1.4 (rev. 5017) on that machine and we could not recreate the problem after 1.5 million calls, so we are thinking in just upgrade to 1.4 after other scenarios show everything else is working.
But I don't know why the problem cannot be recreated using the VMs: the only significant difference is that the productions machines have 4 NICs that are bound in 2 pairs (1 for private ip and another for public ip) while the VMs have just one NIC.
I hope upgrading to 1.4 will solve everything, however, since nobody is complaining about having openser stopping after 28.000 calls, I still believe we have some problem in the openser.cfg itself. I'll check it after we put kamailio 1.4 in production.
regards, takeshi
Hello,
On 10/06/08 13:22, mayamatakeshi wrote:
[...]
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right? After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests. We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated. There could be two reasons: - there is memory leak but happens in some cases that you don't reproduce in lab, but they are in the production environment - you get memory fragmentation Let's see first the debug messages... Hello, here are the link for openser.log and cfg files: http://www.yousendit.com/download/bVlEV0o4R3NoeWJIRGc9PQ After compilation with debug flags for memory manager, I left openser running in production for 24 hours. Then, I moved all traffic to another host and waited for more than 30 minutes before stopping openser. In the openser.cfg, I set debug=2. If you need, I can run it again with a higher value (but I hope it doesn't have to be too high, due to overhead concerns). Sorry, I forgot to tell one thing: the last revision that showed this problem was 4809, so we reverted back to that revision before performing the above. to understand that you couldn't reproduce with latest svn version? So you had to get a previous version?
Hi, no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
OK, I understand now. I was looking at the logs and there seems to be a leak with db operations - something does not free a db result. I will go over the modules that you are using and try to spot any issue -- i will check the change log to see if something happened in the last time regarding such issue..
But here's some developments on my investigation: Up to now, I was trying to recreate the problem using VirtualMachines running the same OS (Fedora 5) as in production. It never happened there, even after 30 million of calls. But we eventually were able to test openser 1.3 using a production machine with the same spec as the ones showing the problem and we were able to generate pkg memory problem using a simple outgoing SIPp scenario. The problem always happens after we reach around 28.000 calls and we confirmed the amount of calls needed to cause the problem grows linearly with the amount of pkg memory (after increase of pkg memory pool by 4, problem started to happen only after around 128.000 calls). However, we also tried the same tests with kamailio 1.4 (rev. 5017) on that machine and we could not recreate the problem after 1.5 million calls, so we are thinking in just upgrade to 1.4 after other scenarios show everything else is working.
OK, 1.4 is recommended, it has lot of new features and many fixes.
But I don't know why the problem cannot be recreated using the VMs: the only significant difference is that the productions machines have 4 NICs that are bound in 2 pairs (1 for private ip and another for public ip) while the VMs have just one NIC.
I see no relation with the NICs.
I hope upgrading to 1.4 will solve everything, however, since nobody is complaining about having openser stopping after 28.000 calls, I still believe we have some problem in the openser.cfg itself. I'll check it after we put kamailio 1.4 in production.
OK, I will dig in further, I might be a bit slow, however, these days.
Cheers, Daniel
I also hit the package oom issue on a 1.3 server. Just to confirm that this problem really exists. I can't confirm after how much number of calls ...
Regards, Ovidiu Sas
On Mon, Oct 6, 2008 at 11:08 AM, Daniel-Constantin Mierla miconda@gmail.com wrote:
Hello,
On 10/06/08 13:22, mayamatakeshi wrote:
[...]
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. openser 1.3.3 was released 3 weeks ago, so I guess you were running previous version before, but it happened again since you upgraded to 1.3.3, right? After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests. We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). We can spot memory leaks even the "out of memory" message is not printed. Just archive the logs (the most important is the shut down time) and made them available for download so they can be investigated. There could be two reasons: - there is memory leak but happens in some cases that you don't reproduce in lab, but they are in the production environment - you get memory fragmentation Let's see first the debug messages... Hello, here are the link for openser.log and cfg files: http://www.yousendit.com/download/bVlEV0o4R3NoeWJIRGc9PQ After compilation with debug flags for memory manager, I left openser running in production for 24 hours. Then, I moved all traffic to another host and waited for more than 30 minutes before stopping openser. In the openser.cfg, I set debug=2. If you need, I can run it again with a higher value (but I hope it doesn't have to be too high, due to overhead concerns). Sorry, I forgot to tell one thing: the last revision that showed this problem was 4809, so we reverted back to that revision before performing the above. to understand that you couldn't reproduce with latest svn version? So you had to get a previous version?
Hi, no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
OK, I understand now. I was looking at the logs and there seems to be a leak with db operations - something does not free a db result. I will go over the modules that you are using and try to spot any issue -- i will check the change log to see if something happened in the last time regarding such issue..
But here's some developments on my investigation: Up to now, I was trying to recreate the problem using VirtualMachines running the same OS (Fedora 5) as in production. It never happened there, even after 30 million of calls. But we eventually were able to test openser 1.3 using a production machine with the same spec as the ones showing the problem and we were able to generate pkg memory problem using a simple outgoing SIPp scenario. The problem always happens after we reach around 28.000 calls and we confirmed the amount of calls needed to cause the problem grows linearly with the amount of pkg memory (after increase of pkg memory pool by 4, problem started to happen only after around 128.000 calls). However, we also tried the same tests with kamailio 1.4 (rev. 5017) on that machine and we could not recreate the problem after 1.5 million calls, so we are thinking in just upgrade to 1.4 after other scenarios show everything else is working.
OK, 1.4 is recommended, it has lot of new features and many fixes.
But I don't know why the problem cannot be recreated using the VMs: the only significant difference is that the productions machines have 4 NICs that are bound in 2 pairs (1 for private ip and another for public ip) while the VMs have just one NIC.
I see no relation with the NICs.
I hope upgrading to 1.4 will solve everything, however, since nobody is complaining about having openser stopping after 28.000 calls, I still believe we have some problem in the openser.cfg itself. I'll check it after we put kamailio 1.4 in production.
OK, I will dig in further, I might be a bit slow, however, these days.
Cheers, Daniel
-- Daniel-Constantin Mierla http://www.asipto.com
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
On Monday 06 October 2008, Daniel-Constantin Mierla wrote:
[..]
no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
OK, I understand now. I was looking at the logs and there seems to be a leak with db operations - something does not free a db result. I will go over the modules that you are using and try to spot any issue -- i will check the change log to see if something happened in the last time regarding such issue.. [..]
Hi Daniel,
yes, i also had this impression. If i understand the log correctly, then there about 6400 not freed database results, that causes the out of memory condition. This are about 1/5 to 1/4 of the total calls that lead to this problem, not good. I checked the mysql driver functions in question, but i did not found something yet. So i also suppose the problem is related to the used modules. Ovidiu, how does this module set matches to the modules you use?
Cheers,
Henning
In that particular deployment I have POSTGRES as a db. Here's the set of loaded modules: loadmodule "postgres.so" loadmodule "sl.so" loadmodule "tm.so" loadmodule "rr.so" loadmodule "maxfwd.so" loadmodule "uri.so" loadmodule "dispatcher.so" loadmodule "mi_fifo.so" loadmodule "xlog.so" loadmodule "textops.so" loadmodule "avpops.so" loadmodule "uac_redirect.so" loadmodule "acc.so" loadmodule "gflags.so" loadmodule "exec.so"
Regards, Ovidiu Sas
On Mon, Oct 6, 2008 at 1:09 PM, Henning Westerholt henning.westerholt@1und1.de wrote:
On Monday 06 October 2008, Daniel-Constantin Mierla wrote:
[..]
no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
OK, I understand now. I was looking at the logs and there seems to be a leak with db operations - something does not free a db result. I will go over the modules that you are using and try to spot any issue -- i will check the change log to see if something happened in the last time regarding such issue.. [..]
Hi Daniel,
yes, i also had this impression. If i understand the log correctly, then there about 6400 not freed database results, that causes the out of memory condition. This are about 1/5 to 1/4 of the total calls that lead to this problem, not good. I checked the mysql driver functions in question, but i did not found something yet. So i also suppose the problem is related to the used modules. Ovidiu, how does this module set matches to the modules you use?
Cheers,
Henning
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Maybe it is related to http://lists.opensips.org/pipermail/users/2008-September/000449.html
regards klaus
Ovidiu Sas schrieb:
In that particular deployment I have POSTGRES as a db. Here's the set of loaded modules: loadmodule "postgres.so" loadmodule "sl.so" loadmodule "tm.so" loadmodule "rr.so" loadmodule "maxfwd.so" loadmodule "uri.so" loadmodule "dispatcher.so" loadmodule "mi_fifo.so" loadmodule "xlog.so" loadmodule "textops.so" loadmodule "avpops.so" loadmodule "uac_redirect.so" loadmodule "acc.so" loadmodule "gflags.so" loadmodule "exec.so"
Regards, Ovidiu Sas
On Mon, Oct 6, 2008 at 1:09 PM, Henning Westerholt henning.westerholt@1und1.de wrote:
On Monday 06 October 2008, Daniel-Constantin Mierla wrote:
[..]
no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
OK, I understand now. I was looking at the logs and there seems to be a leak with db operations - something does not free a db result. I will go over the modules that you are using and try to spot any issue -- i will check the change log to see if something happened in the last time regarding such issue.. [..]
Hi Daniel,
yes, i also had this impression. If i understand the log correctly, then there about 6400 not freed database results, that causes the out of memory condition. This are about 1/5 to 1/4 of the total calls that lead to this problem, not good. I checked the mysql driver functions in question, but i did not found something yet. So i also suppose the problem is related to the used modules. Ovidiu, how does this module set matches to the modules you use?
Cheers,
Henning
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Hello,
this can be related to a system memory leak, in the logs provided by Takeshi I found something related to PKG leak, due to DB operations (the db result was not freed). Also, its config has mysql (see first message in this thread), so should be driver independent.
Cheers, Daniel
On 10/07/08 08:34, Klaus Darilion wrote:
Maybe it is related to http://lists.opensips.org/pipermail/users/2008-September/000449.html
regards klaus
Ovidiu Sas schrieb:
In that particular deployment I have POSTGRES as a db. Here's the set of loaded modules: loadmodule "postgres.so" loadmodule "sl.so" loadmodule "tm.so" loadmodule "rr.so" loadmodule "maxfwd.so" loadmodule "uri.so" loadmodule "dispatcher.so" loadmodule "mi_fifo.so" loadmodule "xlog.so" loadmodule "textops.so" loadmodule "avpops.so" loadmodule "uac_redirect.so" loadmodule "acc.so" loadmodule "gflags.so" loadmodule "exec.so"
Regards, Ovidiu Sas
On Mon, Oct 6, 2008 at 1:09 PM, Henning Westerholt henning.westerholt@1und1.de wrote:
On Monday 06 October 2008, Daniel-Constantin Mierla wrote:
[..]
no, the reason for reversion is that the latest version running in production will not show the problem because we adopted preventive reset to minimize impact to customer calls. So I don't know yet if it shows this problem or not. So I collected the logs using a revision that I was sure could recreate the problem.
OK, I understand now. I was looking at the logs and there seems to be a leak with db operations - something does not free a db result. I will go over the modules that you are using and try to spot any issue -- i will check the change log to see if something happened in the last time regarding such issue.. [..]
Hi Daniel,
yes, i also had this impression. If i understand the log correctly, then there about 6400 not freed database results, that causes the out of memory condition. This are about 1/5 to 1/4 of the total calls that lead to this problem, not good. I checked the mysql driver functions in question, but i did not found something yet. So i also suppose the problem is related to the used modules. Ovidiu, how does this module set matches to the modules you use?
Cheers,
Henning
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Daniel-Constantin Mierla schrieb:
Hello,
this can be related to a system memory leak, in the logs provided by Takeshi I found something related to PKG leak, due to DB operations (the db result was not freed). Also, its config has mysql (see first message in this thread), so should be driver independent.
ok,
cheers klaus
I noticed that you have loaded the snmpstats module in your configuration file. Are you polling SNMPStats with SNMP at all? If not, it is possible that its interprocess buffer is filling up. The solution in this case would be to either periodically poll the module, or disable the module if you aren't using it.
On Tue, 2008-09-23 at 16:31 +0900, mayamatakeshi wrote:
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). And this is much more than in our production environment, with just 600 subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
####### Global Parameters #########
debug=0 log_stderror=no log_facility=LOG_LOCAL0
fork=yes children=4
/* uncomment the following lines to enable debugging */ #debug=6 #fork=no #log_stderror=yes
/* uncomment the next line to disable TCP (default on) */ disable_tcp=yes
/* uncomment the next line to enable the auto temporary blacklisting of not available destinations (default disabled) */ #disable_dns_blacklist=no
/* uncomment the next line to enable IPv6 lookup after IPv4 dns lookup failures (default disabled) */ #dns_try_ipv6=yes
/* uncomment the next line to disable the auto discovery of local aliases based on revers DNS on IPs (default on) */ #auto_aliases=no
/* uncomment the following lines to enable TLS support (default off) */ #disable_tls = no #listen = tls:your_IP:5061 #tls_verify_server = 1 #tls_verify_client = 1 #tls_require_client_certificate = 0 #tls_method = TLSv1 #tls_certificate = "/usr/local/etc/openser/tls/user/user-cert.pem" #tls_private_key = "/usr/local/etc/openser/tls/user/user-privkey.pem" #tls_ca_list = "/usr/local/etc/openser/tls/user/user-calist.pem"
port=5060
/* uncomment and configure the following line if you want openser to bind on a specific interface/port/proto (default bind on all available) */ #listen=udp:202.173.5.181:5060
####### Modules Section ########
#set module path mpath="/usr/local/lib/openser/modules/"
/* uncomment next line for MySQL DB support */ loadmodule "mysql.so" loadmodule "sl.so" loadmodule "tm.so" loadmodule "rr.so" loadmodule "maxfwd.so" loadmodule "usrloc.so" loadmodule "registrar.so" loadmodule "textops.so" loadmodule "mi_fifo.so" loadmodule "uri_db.so" loadmodule "uri.so" loadmodule "xlog.so" loadmodule "acc.so" loadmodule "carrierroute.so" loadmodule "nathelper.so" loadmodule "dialog.so" loadmodule "snmpstats.so" /* uncomment next lines for MySQL based authentication support NOTE: a DB (like mysql) module must be also loaded */ loadmodule "auth.so" loadmodule "auth_db.so" loadmodule "lcr.so" /* uncomment next line for aliases support NOTE: a DB (like mysql) module must be also loaded */ loadmodule "alias_db.so" /* uncomment next line for multi-domain support NOTE: a DB (like mysql) module must be also loaded NOTE: be sure and enable multi-domain support in all used modules (see "multi-module params" section ) */ loadmodule "domain.so" /* uncomment the next two lines for presence server support NOTE: a DB (like mysql) module must be also loaded */ #loadmodule "presence.so" #loadmodule "presence_xml.so" loadmodule "uac.so" loadmodule "avpops.so" # ----------------- setting module-specific parameters ---------------
# ----- mi_fifo params ----- modparam("mi_fifo", "fifo_name", "/tmp/openser_fifo")
# ----- rr params ----- # add value to ;lr param to cope with most of the UAs modparam("rr", "enable_full_lr", 1) # do not append from tag to the RR (no need for this script) modparam("rr", "append_fromtag", 1)
# ----- rr params ----- modparam("registrar", "method_filtering", 1) /* uncomment the next line to disable parallel forking via location */ # modparam("registrar", "append_branches", 0) /* uncomment the next line not to allow more than 10 contacts per AOR */ modparam("registrar", "max_contacts", 10) modparam("registrar", "min_expires", 30) modparam("registrar", "max_expires", 40) modparam("registrar", "default_expires", 35) # ----- uri_db params ----- /* by default we disable the DB support in the module as we do not need it in this configuration */ modparam("uri_db", "use_uri_table", 0) modparam("uri_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("uri_db", "use_domain", 1)
# ----- acc params ----- /* what sepcial events should be accounted ? */ modparam("acc", "early_media", 1) modparam("acc", "report_ack", 1) modparam("acc", "report_cancels", 1) /* by default ww do not adjust the direct of the sequential requests. if you enable this parameter, be sure the enable "append_fromtag" in "rr" module */ modparam("acc", "detect_direction", 0) /* account triggers (flags) */ modparam("acc", "failed_transaction_flag", 3) modparam("acc", "log_flag", 1) modparam("acc", "log_missed_flag", 2) /* uncomment the following lines to enable DB accounting also */ modparam("acc", "db_flag", 1) modparam("acc", "db_missed_flag", 2)
# ----- usrloc params ----- #modparam("usrloc", "db_mode", 0) /* uncomment the following lines if you want to enable DB persistency for location entries */ modparam("usrloc", "db_mode", 2) modparam("usrloc", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("usrloc", "use_domain", 1)
# ----- auth_db params ----- /* uncomment the following lines if you want to enable the DB based authentication */ modparam("auth_db", "calculate_ha1", yes) modparam("auth_db", "password_column", "password") modparam("auth_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("auth_db", "load_credentials", "$avp(s:rpid)=rpid; $avp(s:blocked)=subscriber_status")
# ----- alias_db params ----- /* uncomment the following lines if you want to enable the DB based aliases */ modparam("alias_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("alias_db", "use_domain", 0)
# ----- domain params ----- /* uncomment the following lines to enable multi-domain detection support */ modparam("domain", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("domain", "db_mode", 1) # Use caching
# ----- multi-module params ----- /* uncomment the following line if you want to enable multi-domain support in the modules (dafault off) */ #modparam("alias_db|auth_db|usrloc|uri_db", "use_domain", 1)
# ----- presence params ----- /* uncomment the following lines if you want to enable presence */ #modparam("presence|presence_xml", "db_url", # "mysql://openser:openserrw@localhost/openser") #modparam("presence_xml", "force_active", 1) #modparam("presence", "server_address", "sip:192.168.1.2:5060")
# ----- carrieroute params ----- modparam("carrierroute", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("carrierroute", "config_source", "db") modparam("carrierroute", "use_domain", 1) # ----- NatHelper ----- #para versao a partir da versao 1.2 eh necessario esse paramtro para nao dar erro qdo usa a funcao "fix_nated_register();" modparam("nathelper|registrar", "received_avp", "$avp(i:42)") modparam("nathelper", "rtpproxy_sock", "udp:127.0.0.1:22222")
# ----- LCR ----- modparam("lcr", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("lcr|tm", "fr_inv_timer_avp", "$avp(i:704)") modparam("lcr", "gw_uri_avp", "$avp(i:709)") modparam("^auth$|lcr", "rpid_avp", "$avp(i:302)") modparam("lcr", "contact_avp", "$avp(i:711)") modparam("lcr", "ruri_user_avp", "$avp(i:500)") modparam("lcr", "dm_flag", 25)
# ----- Dialog ---- modparam("dialog", "dlg_flag", 4)
# ----- SnmpStat ----- modparam("snmpstats", "sipEntityType", "registrarServer") modparam("snmpstats", "sipEntityType", "proxyServer") modparam("snmpstats", "MsgQueueMinorThreshold", 2000) modparam("snmpstats", "MsgQueueMajorThreshold", 5000) modparam("snmpstats", "dlg_minor_threshold", 500) modparam("snmpstats", "dlg_major_threshold", 750) modparam("snmpstats", "snmpgetPath","/usr/bin/") modparam("snmpstats", "snmpCommunity","public")
####### Routing Logic ########
# main request routing logic
route{
if (!mf_process_maxfwd_header("10")) { sl_send_reply("483","Too Many Hops"); exit; } ##nat route(2); if (has_totag()) { # sequential request withing a dialog should # take the path determined by record-routing if (loose_route()) { if (is_method("BYE")) { setflag(1); # do accouting ... setflag(3); # ... even if the transaction fails } route(1); } else { /* uncomment the following lines if you want to enable
presence */ ##if (is_method("SUBSCRIBE") && $rd == "your.server.ip.address") { ## # in-dialog subscribe requests ## route(2); ## exit; ##} if ( is_method("ACK") ) { if ( t_check_trans() ) { # non loose-route, but stateful ACK; must be an ACK after a 487 or e.g. 404 from upstream server #t_relay(); #exit; route(1); } else { # ACK without matching transaction ... ignore and discard.\n"); exit; } } sl_send_reply("404","Not here"); } exit; }
#initial requests setflag(4); #for dialog statistics # CANCEL processing if (is_method("CANCEL")) { if (t_check_trans()) route(1); # t_relay(); # exit; } #t_check_trans(); if (is_method("PUBLISH|SUBSCRIBE|REFER|OPTIONS|MESSAGE")) { sl_send_reply("405", "Method not allowed"); exit; } # authenticate if from local subscriber (uncomment to enable auth) if (!(method=="REGISTER") && (!from_gw())) { if (!proxy_authorize("", "subscriber")) { proxy_challenge("", "0"); exit; } if (!check_from()) { sl_send_reply("403","Forbidden auth ID"); exit; }else if (avp_check("$avp(s:blocked)", "eq/0")) { sl_send_reply("603","Subscriber disabled"); exit; }else if (avp_check("$avp(s:blocked)", "eq/1")) { sl_send_reply("603","Subscriber with outgoing
blocked"); exit; }
consume_credentials(); # caller authenticated } # record routing if (!is_method("REGISTER|MESSAGE")) record_route(); # account only INVITEs if (is_method("INVITE")) { setflag(1); # do accouting } if (is_method("REGISTER")) { # authenticate the REGISTER requests (uncomment to enable
auth) if (!proxy_authorize("", "subscriber")) { proxy_challenge("", "0"); exit; }
if (!check_to()) { sl_send_reply("403","Forbidden auth ID"); exit; }else if (avp_check("$avp(s:blocked)", "eq/0")) { sl_send_reply("403","Subscriber disabled"); exit; } if (!save("location")) sl_reply_error(); exit; } if ($rU==NULL) { # request with no Username in RURI sl_send_reply("484","Address Incomplete"); exit; } # apply DB based aliases (uncomment to enable) ##alias_db_lookup("dbaliases"); #if the call came from a known gateway it is not authenticated and
we cannot use the function check_from() if (from_gw()) { route(4); }else if (!check_from()) {#if the check_from() returns false the call is not from a subscriber route(4); } else {#it is a subscriber, route using flip domain xlog("L_INFO", "routing using carrierroute $rm to $ru\n"); if (!cr_user_rewrite_uri("$fu", "flip")) { t_newtran(); t_reply("404", "No Route"); exit; } #replaces from by it's default DID uac_replace_from("sip:$avp(s:rpid)@$fd"); }
# when routing via usrloc, log the missed calls also setflag(2); route(1);
}
route[1] { xlog("L_INFO", "ROUTE_1 $rm to $ru\n"); if (subst_uri('/(sip:.*);nat=yes/\1/')) { setflag(6); };
if (isflagset(5)||isflagset(6)) { route(3); } if (!t_relay()) { sl_reply_error(); }; exit;
}
route[2] { xlog("L_INFO", "ROUTE_2 $rm to $ru\n"); if (method=="REGISTER") { fix_nated_register(); } else if (!from_gw()){ fix_nated_contact(); }; setflag(5); } route[3] { xlog("L_INFO", "ROUTE_3 $rm to $ru\n"); if (is_method("BYE|CANCEL")) { unforce_rtp_proxy(); } else if (is_method("INVITE")) { xlog("L_INFO", "FORCE RTP w/ parameter.\n"); force_rtp_proxy("r"); t_on_failure("1"); }; if (isflagset(5)) search_append('Contact:.*sip:[^>[:cntrl:]]*', ';nat=yes'); t_on_reply("1"); }
route[4] {
xlog("L_INFO", "uri does exist $rm to $ru \n"); if (alias_db_lookup("dbaliases")){ if (!lookup("location")) { switch ($retcode) { case -1: t_newtran(); t_reply("404", "Subscriber not
online"); exit; case -2: sl_send_reply("405", "Method Not Allowed"); exit; } } }else{#check if did is blocked $rU = "(BLK)" + $rU; if (alias_db_lookup("dbaliases")){ sl_send_reply("403", "DID blocked"); exit;
}else{# if it is not a valid DID nor a blocked DID tries to
route it using peering domain if (!cr_rewrite_uri("peering", "call_id")) { t_newtran(); t_reply("404", "Peering Not Found"); exit; }
} }
}
failure_route[1] { xlog("L_INFO", "FAILURE $rm to $ru\n"); if (isflagset(6)||isflagset(5)) { unforce_rtp_proxy(); } }
onreply_route[1] { xlog("L_INFO", "ONREPLY_1 - Status $rs from $si $rm .\n"); if (is_method("INVITE")) { if ((isflagset(5)||isflagset(6)) && status=~"(183)|(2[0-9][0-9])") { force_rtp_proxy(); } search_append('Contact:.*sip:[^>[:cntrl:]]*', ';nat=yes');
if (!from_gw()){ #if (isflagset(6)) { xlog("L_INFO", "ONREPLY_1 - ! from gw.\n"); fix_nated_contact(); } exit; }
}
Regards, takeshi
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
Hi, Yes, we are using PRTG to get call and registration stats using SNMP. Thanks for the tip anyway as we haven't set SNMP monitoring yet in all of our servers.
On Tue, Sep 23, 2008 at 11:44 PM, Jeffrey Magder jmagder@somanetworks.comwrote:
I noticed that you have loaded the snmpstats module in your configuration file. Are you polling SNMPStats with SNMP at all? If not, it is possible that its interprocess buffer is filling up. The solution in this case would be to either periodically poll the module, or disable the module if you aren't using it.
On Tue, 2008-09-23 at 16:31 +0900, mayamatakeshi wrote:
Hello, we have openser 1.3.3 running in production (current rev.: 4943). For 3 times in 50 days we had to restart openser to correct pkg memory problem. After some time logging messages like this: /openser.log:Aug 19 10:39:18 ipx022 /usr/local/sbin/openser[16991]: ERROR:core:new_credentials: no pkg memory left, openser will eventually run out of pkg memory and refuse all subsequent requests.
We are trying to recreate this in our lab so that we can follow memory troubleshooting instructions at http://kamailio.net/dokuwiki/doku.php/troubleshooting:memory, but so far we were unable to do it even when generating millions of calls and registration transactions (we are using SIPp to generate normal call flows and even abnormal call flows detected when reading openser.log, like 'invalid cseq for aor', malformed SIP messages etc). And this is much more than in our production environment, with just 600 subscribers and about 2000 calls a day.
The frequency the problem happens is increasing with the number of subscribers, so we are performing periodic restart of openser (actually, what we do is to switch over to the standby server). We already recompiled openser with pkg memory pool size set to 4MB so that this will not have to be done frequently.
Since we cannot recreate this in our lab, we suspect there is a situation happening in production that might not be having been properly handled by openser.cfg. So my question is: would it be possible to an overlooked detail in openser.cfg to cause pkg memory problem?
In case someone could take a look at it, here's our cfg file:
####### Global Parameters #########
debug=0 log_stderror=no log_facility=LOG_LOCAL0
fork=yes children=4
/* uncomment the following lines to enable debugging */ #debug=6 #fork=no #log_stderror=yes
/* uncomment the next line to disable TCP (default on) */ disable_tcp=yes
/* uncomment the next line to enable the auto temporary blacklisting of not available destinations (default disabled) */ #disable_dns_blacklist=no
/* uncomment the next line to enable IPv6 lookup after IPv4 dns lookup failures (default disabled) */ #dns_try_ipv6=yes
/* uncomment the next line to disable the auto discovery of local aliases based on revers DNS on IPs (default on) */ #auto_aliases=no
/* uncomment the following lines to enable TLS support (default off) */ #disable_tls = no #listen = tls:your_IP:5061 #tls_verify_server = 1 #tls_verify_client = 1 #tls_require_client_certificate = 0 #tls_method = TLSv1 #tls_certificate = "/usr/local/etc/openser/tls/user/user-cert.pem" #tls_private_key = "/usr/local/etc/openser/tls/user/user-privkey.pem" #tls_ca_list = "/usr/local/etc/openser/tls/user/user-calist.pem"
port=5060
/* uncomment and configure the following line if you want openser to bind on a specific interface/port/proto (default bind on all available) */ #listen=udp:202.173.5.181:5060
####### Modules Section ########
#set module path mpath="/usr/local/lib/openser/modules/"
/* uncomment next line for MySQL DB support */ loadmodule "mysql.so" loadmodule "sl.so" loadmodule "tm.so" loadmodule "rr.so" loadmodule "maxfwd.so" loadmodule "usrloc.so" loadmodule "registrar.so" loadmodule "textops.so" loadmodule "mi_fifo.so" loadmodule "uri_db.so" loadmodule "uri.so" loadmodule "xlog.so" loadmodule "acc.so" loadmodule "carrierroute.so" loadmodule "nathelper.so" loadmodule "dialog.so" loadmodule "snmpstats.so" /* uncomment next lines for MySQL based authentication support NOTE: a DB (like mysql) module must be also loaded */ loadmodule "auth.so" loadmodule "auth_db.so" loadmodule "lcr.so" /* uncomment next line for aliases support NOTE: a DB (like mysql) module must be also loaded */ loadmodule "alias_db.so" /* uncomment next line for multi-domain support NOTE: a DB (like mysql) module must be also loaded NOTE: be sure and enable multi-domain support in all used modules (see "multi-module params" section ) */ loadmodule "domain.so" /* uncomment the next two lines for presence server support NOTE: a DB (like mysql) module must be also loaded */ #loadmodule "presence.so" #loadmodule "presence_xml.so" loadmodule "uac.so" loadmodule "avpops.so" # ----------------- setting module-specific parameters ---------------
# ----- mi_fifo params ----- modparam("mi_fifo", "fifo_name", "/tmp/openser_fifo")
# ----- rr params ----- # add value to ;lr param to cope with most of the UAs modparam("rr", "enable_full_lr", 1) # do not append from tag to the RR (no need for this script) modparam("rr", "append_fromtag", 1)
# ----- rr params ----- modparam("registrar", "method_filtering", 1) /* uncomment the next line to disable parallel forking via location */ # modparam("registrar", "append_branches", 0) /* uncomment the next line not to allow more than 10 contacts per AOR */ modparam("registrar", "max_contacts", 10) modparam("registrar", "min_expires", 30) modparam("registrar", "max_expires", 40) modparam("registrar", "default_expires", 35) # ----- uri_db params ----- /* by default we disable the DB support in the module as we do not need it in this configuration */ modparam("uri_db", "use_uri_table", 0) modparam("uri_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("uri_db", "use_domain", 1)
# ----- acc params ----- /* what sepcial events should be accounted ? */ modparam("acc", "early_media", 1) modparam("acc", "report_ack", 1) modparam("acc", "report_cancels", 1) /* by default ww do not adjust the direct of the sequential requests. if you enable this parameter, be sure the enable "append_fromtag" in "rr" module */ modparam("acc", "detect_direction", 0) /* account triggers (flags) */ modparam("acc", "failed_transaction_flag", 3) modparam("acc", "log_flag", 1) modparam("acc", "log_missed_flag", 2) /* uncomment the following lines to enable DB accounting also */ modparam("acc", "db_flag", 1) modparam("acc", "db_missed_flag", 2)
# ----- usrloc params ----- #modparam("usrloc", "db_mode", 0) /* uncomment the following lines if you want to enable DB persistency for location entries */ modparam("usrloc", "db_mode", 2) modparam("usrloc", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("usrloc", "use_domain", 1)
# ----- auth_db params ----- /* uncomment the following lines if you want to enable the DB based authentication */ modparam("auth_db", "calculate_ha1", yes) modparam("auth_db", "password_column", "password") modparam("auth_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("auth_db", "load_credentials", "$avp(s:rpid)=rpid; $avp(s:blocked)=subscriber_status")
# ----- alias_db params ----- /* uncomment the following lines if you want to enable the DB based aliases */ modparam("alias_db", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("alias_db", "use_domain", 0)
# ----- domain params ----- /* uncomment the following lines to enable multi-domain detection support */ modparam("domain", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("domain", "db_mode", 1) # Use caching
# ----- multi-module params ----- /* uncomment the following line if you want to enable multi-domain support in the modules (dafault off) */ #modparam("alias_db|auth_db|usrloc|uri_db", "use_domain", 1)
# ----- presence params ----- /* uncomment the following lines if you want to enable presence */ #modparam("presence|presence_xml", "db_url", # "mysql://openser:openserrw@localhost/openser") #modparam("presence_xml", "force_active", 1) #modparam("presence", "server_address", "sip:192.168.1.2:5060")
# ----- carrieroute params ----- modparam("carrierroute", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("carrierroute", "config_source", "db") modparam("carrierroute", "use_domain", 1) # ----- NatHelper ----- #para versao a partir da versao 1.2 eh necessario esse paramtro para nao dar erro qdo usa a funcao "fix_nated_register();" modparam("nathelper|registrar", "received_avp", "$avp(i:42)") modparam("nathelper", "rtpproxy_sock", "udp:127.0.0.1:22222")
# ----- LCR ----- modparam("lcr", "db_url", "mysql://openser:openserrw@localhost/openser") modparam("lcr|tm", "fr_inv_timer_avp", "$avp(i:704)") modparam("lcr", "gw_uri_avp", "$avp(i:709)") modparam("^auth$|lcr", "rpid_avp", "$avp(i:302)") modparam("lcr", "contact_avp", "$avp(i:711)") modparam("lcr", "ruri_user_avp", "$avp(i:500)") modparam("lcr", "dm_flag", 25)
# ----- Dialog ---- modparam("dialog", "dlg_flag", 4)
# ----- SnmpStat ----- modparam("snmpstats", "sipEntityType", "registrarServer") modparam("snmpstats", "sipEntityType", "proxyServer") modparam("snmpstats", "MsgQueueMinorThreshold", 2000) modparam("snmpstats", "MsgQueueMajorThreshold", 5000) modparam("snmpstats", "dlg_minor_threshold", 500) modparam("snmpstats", "dlg_major_threshold", 750) modparam("snmpstats", "snmpgetPath","/usr/bin/") modparam("snmpstats", "snmpCommunity","public")
####### Routing Logic ########
# main request routing logic
route{
if (!mf_process_maxfwd_header("10")) { sl_send_reply("483","Too Many Hops"); exit; } ##nat route(2); if (has_totag()) { # sequential request withing a dialog should # take the path determined by record-routing if (loose_route()) { if (is_method("BYE")) { setflag(1); # do accouting ... setflag(3); # ... even if the transaction fails } route(1); } else { /* uncomment the following lines if you want to enable
presence */ ##if (is_method("SUBSCRIBE") && $rd == "your.server.ip.address") { ## # in-dialog subscribe requests ## route(2); ## exit; ##} if ( is_method("ACK") ) { if ( t_check_trans() ) { # non loose-route, but stateful ACK; must be an ACK after a 487 or e.g. 404 from upstream server #t_relay(); #exit; route(1); } else { # ACK without matching transaction ... ignore and discard.\n"); exit; } } sl_send_reply("404","Not here"); } exit; }
#initial requests setflag(4); #for dialog statistics # CANCEL processing if (is_method("CANCEL")) { if (t_check_trans()) route(1); # t_relay(); # exit; } #t_check_trans(); if (is_method("PUBLISH|SUBSCRIBE|REFER|OPTIONS|MESSAGE")) { sl_send_reply("405", "Method not allowed"); exit; } # authenticate if from local subscriber (uncomment to enable auth) if (!(method=="REGISTER") && (!from_gw())) { if (!proxy_authorize("", "subscriber")) { proxy_challenge("", "0"); exit; } if (!check_from()) { sl_send_reply("403","Forbidden auth ID"); exit; }else if (avp_check("$avp(s:blocked)", "eq/0")) { sl_send_reply("603","Subscriber disabled"); exit; }else if (avp_check("$avp(s:blocked)", "eq/1")) { sl_send_reply("603","Subscriber with outgoing
blocked"); exit; }
consume_credentials(); # caller authenticated } # record routing if (!is_method("REGISTER|MESSAGE")) record_route(); # account only INVITEs if (is_method("INVITE")) { setflag(1); # do accouting } if (is_method("REGISTER")) { # authenticate the REGISTER requests (uncomment to enable
auth) if (!proxy_authorize("", "subscriber")) { proxy_challenge("", "0"); exit; }
if (!check_to()) { sl_send_reply("403","Forbidden auth ID"); exit; }else if (avp_check("$avp(s:blocked)", "eq/0")) { sl_send_reply("403","Subscriber disabled"); exit; } if (!save("location")) sl_reply_error(); exit; } if ($rU==NULL) { # request with no Username in RURI sl_send_reply("484","Address Incomplete"); exit; } # apply DB based aliases (uncomment to enable) ##alias_db_lookup("dbaliases"); #if the call came from a known gateway it is not authenticated and
we cannot use the function check_from() if (from_gw()) { route(4); }else if (!check_from()) {#if the check_from() returns false the call is not from a subscriber route(4); } else {#it is a subscriber, route using flip domain xlog("L_INFO", "routing using carrierroute $rm to $ru\n"); if (!cr_user_rewrite_uri("$fu", "flip")) { t_newtran(); t_reply("404", "No Route"); exit; } #replaces from by it's default DID uac_replace_from("sip:$avp(s:rpid)@$fd"); }
# when routing via usrloc, log the missed calls also setflag(2); route(1);
}
route[1] { xlog("L_INFO", "ROUTE_1 $rm to $ru\n"); if (subst_uri('/(sip:.*);nat=yes/\1/')) { setflag(6); };
if (isflagset(5)||isflagset(6)) { route(3); } if (!t_relay()) { sl_reply_error(); }; exit;
}
route[2] { xlog("L_INFO", "ROUTE_2 $rm to $ru\n"); if (method=="REGISTER") { fix_nated_register(); } else if (!from_gw()){ fix_nated_contact(); }; setflag(5); } route[3] { xlog("L_INFO", "ROUTE_3 $rm to $ru\n"); if (is_method("BYE|CANCEL")) { unforce_rtp_proxy(); } else if (is_method("INVITE")) { xlog("L_INFO", "FORCE RTP w/ parameter.\n"); force_rtp_proxy("r"); t_on_failure("1"); }; if (isflagset(5)) search_append('Contact:.*sip:[^>[:cntrl:]]*', ';nat=yes'); t_on_reply("1"); }
route[4] {
xlog("L_INFO", "uri does exist $rm to $ru \n"); if (alias_db_lookup("dbaliases")){ if (!lookup("location")) { switch ($retcode) { case -1: t_newtran(); t_reply("404", "Subscriber not
online"); exit; case -2: sl_send_reply("405", "Method Not Allowed"); exit; } } }else{#check if did is blocked $rU = "(BLK)" + $rU; if (alias_db_lookup("dbaliases")){ sl_send_reply("403", "DID blocked"); exit;
}else{# if it is not a valid DID nor a blocked DID tries to
route it using peering domain if (!cr_rewrite_uri("peering", "call_id")) { t_newtran(); t_reply("404", "Peering Not Found"); exit; }
} }
}
failure_route[1] { xlog("L_INFO", "FAILURE $rm to $ru\n"); if (isflagset(6)||isflagset(5)) { unforce_rtp_proxy(); } }
onreply_route[1] { xlog("L_INFO", "ONREPLY_1 - Status $rs from $si $rm .\n"); if (is_method("INVITE")) { if ((isflagset(5)||isflagset(6)) && status=~"(183)|(2[0-9][0-9])") { force_rtp_proxy(); } search_append('Contact:.*sip:[^>[:cntrl:]]*', ';nat=yes');
if (!from_gw()){ #if (isflagset(6)) { xlog("L_INFO", "ONREPLY_1 - ! from gw.\n"); fix_nated_contact(); } exit; }
}
Regards, takeshi
Users mailing list Users@lists.kamailio.org http://lists.kamailio.org/cgi-bin/mailman/listinfo/users