Thanks :)....it;s a lot, but not enough ...
could you print in frame 1 the following values:
new_val.s,
req->from->name.s
req->from->len
btw, are there any error from the process before the crash?
thanks,
Bogdan
Martin Klisch wrote:
Hi,
i use acc and uac. here are the backtrace + frame infos for the core dump,
i dont know much about debugging, but i hope these things are useful:
i bet it's too much info. :)
--------------------snip-backtrace
(gdb) backtrace
#0 0xff2c0888 in memcpy () from
/platform/SUNW,Sun-Fire-V210/lib/libc_psr.so.1
#1 0xfeb86948 in restore_from_reply (t=0xfc9fab20, type=2, p=0x0) at
from.c:574
#2 0xff02bb64 in run_trans_callbacks (type=2, trans=0xfc9eb7e8,
req=0x12c9cc, rpl=0xdfcd8, code=-56996184) at t_hooks.c:205
#3 0xff02ee2c in t_reply_matching (p_msg=0x140ac8, p_branch=0x3) at
t_lookup.c:842
#4 0xff02f550 in t_check (p_msg=0x140ac8, param_branch=0xffbffa7c) at
t_lookup.c:913
#5 0xff0388bc in reply_received (p_msg=0x12d000) at t_reply.c:1269
#6 0x00024dd4 in forward_reply (msg=0x140ac8) at forward.c:488
#7 0x00043b78 in receive_msg (buf=0x12d800 "", len=41462, rcv_info=0x1)
at receive.c:195
#8 0x0006f134 in udp_rcv_loop () at udp_server.c:465
#9 0x0003385c in main_loop () at main.c:834
#10 0x00035be4 in main (argc=3, argv=0xe7800) at main.c:1399
--------------snip frame
(gdb) frame 0
#0 0xff2c0888 in memcpy () from
/platform/SUNW,Sun-Fire-V210/lib/libc_psr.so.1
(gdb) list
579 rpl->from->len,rpl->from->name.s);
580 l = del_lump( rpl, rpl->from->name.s-rpl->buf,
rpl->from->len, 0);
581 if (l==0) {
582 LOG(L_ERR,"ERROR:uac:restore_from_reply: del lump
failed\n");
583 return;
584 }
585
586 DBG("DBG:uac::restore_from_reply: inserting <%.*s>\n",
587 new_val.len,new_val.s);
588 if (insert_new_lump_after( l, new_val.s, new_val.len,
0)==0) {
(gdb)
(gdb) frame 1
#1 0xfeb86948 in restore_from_reply (t=0xfc9fab20, type=2, p=0x0) at
from.c:574
574 memcpy( new_val.s, req->from->name.s, req->from->len);
(gdb) list
569 new_val.s = pkg_malloc( req->from->len );
570 if (p==0) {
571 LOG(L_ERR,"ERROR:uac:restore_from_reply: no more
pkg mem\n");
572 return;
573 }
574 memcpy( new_val.s, req->from->name.s, req->from->len);
575 new_val.len = req->from->len;
576
577
578 DBG("DBG:uac::restore_from_reply: removing <%.*s>\n",
(gdb)
(gdb) frame 2
#2 0xff02bb64 in run_trans_callbacks (type=2, trans=0xfc9eb7e8,
req=0x12c9cc, rpl=0xdfcd8, code=-56996184) at t_hooks.c:205
205 cbp->callback( trans, type, ¶ms );
(gdb) list
200 for (cbp=trans->tmcb_hl.first; cbp; cbp=cbp->next) {
201 if ( (cbp->types)&type ) {
202 DBG("DBG: trans=%p, callback type %d, id
%d entered\n",
203 trans, type, cbp->id );
204 params.param = &(cbp->param);
205 cbp->callback( trans, type, ¶ms );
206 }
207 }
208 set_avp_list( backup );
209 params.extra1 = params.extra2 = 0;
(gdb)
#3 0xff02ee2c in t_reply_matching (p_msg=0x140ac8, p_branch=0x3) at
t_lookup.c:842
842 run_trans_callbacks( TMCB_RESPONSE_IN, T,
T->uas.request, p_msg,
(gdb) list
837 if (parse_headers(p_msg, HDR_TO_F, 0)==-1) {
838 LOG(L_ERR, "ERROR:
t_reply_matching: to parsing failed\n");
839 }
840 }
841 if (!is_local(p_cell)) {
842 run_trans_callbacks( TMCB_RESPONSE_IN, T,
T->uas.request, p_msg,
843 p_msg->REPLY_STATUS);
844 }
845 return 1;
846 } /* for cycle */
(gdb)
(gdb) frame 4
#4 0xff02f550 in t_check (p_msg=0x140ac8, param_branch=0xffbffa7c) at
t_lookup.c:913
913 t_reply_matching( p_msg ,
(gdb) list
908 LOG(L_ERR, "ERROR:
INVITE reply cannot be parsed\n");
909 return -1;
910 }
911 }
912
913 t_reply_matching( p_msg ,
914
param_branch!=0?param_branch:&local_branch );
915
916 }
917 #ifdef EXTRA_DEBUG
(gdb) frame 5
#5 0xff0388bc in reply_received (p_msg=0x12d000) at t_reply.c:1269
1269 if (t_check(p_msg, &branch ) == -1) return 1;
(gdb) list
1264 struct ua_client *uac;
1265 struct cell *t;
1266 struct usr_avp **backup_list;
1267
1268 /* make sure we know the associated transaction ... */
1269 if (t_check(p_msg, &branch ) == -1) return 1;
1270
1271 /*... if there is none, tell the core router to fwd
statelessly */
1272 t = get_t();
1273 if ((t == 0) || (t == T_UNDEFINED)) return 1;
(gdb)
(gdb) frame 6
#6 0x00024dd4 in forward_reply (msg=0x140ac8) at forward.c:488
488 if (mod->exports->response_f(msg)==0) goto
skip;
(gdb) list
483 /* quick hack, slower for multiple modules*/
484 for (mod=modules;mod;mod=mod->next){
485 if ((mod->exports) &&
(mod->exports->response_f)){
486 DBG("DEBUG:forward_reply: found module %s,
passing reply to it\n",
487 mod->exports->name);
488 if (mod->exports->response_f(msg)==0) goto
skip;
489 }
490 }
491
492 /* we have to forward the reply stateless, so we need
second via -bogdan*/
(gdb)
(gdb) frame 7
#7 0x00043b78 in receive_msg (buf=0x12d800 "", len=41462, rcv_info=0x1)
at receive.c:195
195 forward_reply(msg);
(gdb) list
190 msg->REPLY_STATUS);
191 update_stat( drp_rpls, 1);
192 goto end; /* drop the message */
193 } else {
194 /* send the msg */
195 forward_reply(msg);
196 /* TODO - TX reply stat */
197 }
198
199 /* execute post reply-script callbacks */
(gdb)
(gdb) frame 8
#8 0x0006f134 in udp_rcv_loop () at udp_server.c:465
465 receive_msg(buf, len, &ri);
(gdb) list
460 continue;
461 }
462
463
464 /* receive_msg must free buf too!*/
465 receive_msg(buf, len, &ri);
466
467 /* skip: do other stuff */
468
469 }
(gdb)
(gdb) frame 9
#9 0x0003385c in main_loop () at main.c:834
834 return udp_rcv_loop();
(gdb) list
829 bind_address=si; /*
shortcut */
830 if (init_child(chd_rank) <
0) {
831 LOG(L_ERR,
"init_child failed\n");
832 goto error;
833 }
834 return udp_rcv_loop();
835 }else{
836
pt[process_no].pid=pid; /*should be in shared mem.*/
837
snprintf(pt[process_no].desc, MAX_PT_DESC,
838 "receiver
child=%d sock= %s:%s", i,
(gdb)
(gdb) frame 10
#10 0x00035be4 in main (argc=3, argv=0xe7800) at main.c:1399
1399 ret=main_loop();
(gdb) list
1394 r);
1395 goto error;
1396 };
1397
1398
1399 ret=main_loop();
1400
1401 error:
1402 /*kill everything*/
1403 kill_all_children(SIGTERM);
(gdb)
Thanks Martin,
it looks like the crash occurs in a function registered as callback to
TM for the TMCB_RESPONSE_IN event.
there are couple of modules doing this - acc, osp, siptrace, uac, but I
cannot figure out which one. Can you get more info from the debugger ?
gdb provides more info - like the corresponding files for the called
functions...
also which modules (from above) are you using?
regards,
bogdan
Martin Klisch wrote:
Hi Bogdan-Andrei,
here is the backtrace:
root@sip-b:/var/core # pstack
core_sip-b_openser_40002_40002_1178955651_10547
core 'core_sip-b_openser_40002_40002_1178955651_10547' of 10547:
/opt/SER/SER0/bin/openser -P /var/run/SER0.pid -f /opt/SER/SER0/etc/SE
ff2c0888 memcpy (fc9fab20, 2, 0, feb868c0, 2, 4a8) + 3c8
ff02bb5c run_trans_callbacks (2, fc9eb7e8, 12c9cc, dfcd8, fc9a4ea8, 1)
+ bc
ff02ee24 t_reply_matching (140ac8, 3, e788c, 3, 0, 1) + 12e4
ff02f548 t_check (140ac8, ffbffa7c, 22c2e0, 3, 0, 2) + 268
ff0388b4 reply_received (140ac8, 3, ff05df54, 0, ff0388a0, ec400) + 14
00024dcc forward_reply (140ac8, 3, e0400, 0, fc804ae8, a1f7) + 10c
00043b70 receive_msg (12d800, a1f6, 1, 0, 0, 13c4) + 6d0
0006f12c udp_rcv_loop (fc890, fc800, e7800, fc800, 10c890, dfc00) + a4c
00033854 main_loop (fc8040e8, 2500000, 0, e7800, 38, dfc00) + 5f4
00035bdc main (3, e7800, ffbffde4, 3, ffffffff, e7800) + 1cbc
000174fc _start (0, 0, 0, 0, 0, 0) + 5c
Hi Martin,
once you spot the error against, please follow the wiki indications and
get the dump for pkg memory.
changing from db_mode 3 to 1 should not affect the consumption of
private memory (the location cache is kept in shared memory).
If you still have the core file, please post the backtrace.
Thanks and regards,
Bogdan
Martin Klisch wrote:
> Hi,
>
> i can not reproduce the errors. it only appears per random. in the
> last
> 5
> days it appeared two times. five days ago i moves the daemon to
> another
> sun fire with less ram and switched from db_mode 3 to 1 - could this
> be
> the reason? i moved it back to the other machine now.
>
> on saturday it coredumped after the memory errors. would the coredump
> file
> help?
>
> bye, martin
>
>
>
>
>> Martin,
>>
>> the errors refer to a potential problem related to private memory
>> (not
>> shared one), so you need to send the signal (as explained on the
>> wiki)
>> to the process printing such errors ( like 20106). Get and post the
>> mem
>> dump for pkg memory.
>>
>> regards,
>> bogdan
>>
>> Klaus Darilion wrote:
>>
>>
>>
>>>
http://openser.org/dokuwiki/doku.php/troubleshooting:memory
>>>
>>> Martin Klisch wrote:
>>>
>>>
>>>
>>>> Hi,
>>>>
>>>> i have some memory errors after running OpenSER one day:
>>>> May 15 02:46:56 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 381148
>>>> local0.error] ERROR: build_req_buf_from_sip_req: out of memory
>>>> May 15 02:46:56 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 933771
>>>> local0.error] ERROR:tm:print_uac_request: no pkg_mem
>>>> May 15 02:46:56 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 826633
>>>> local0.error] ERROR:tm:t_forward_nonack: failure to add branches
>>>> May 15 02:47:00 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 381148
>>>> local0.error] ERROR: build_req_buf_from_sip_req: out of memory
>>>> May 15 02:47:00 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 933771
>>>> local0.error] ERROR:tm:print_uac_request: no pkg_mem
>>>> May 15 02:47:00 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 826633
>>>> local0.error] ERROR:tm:t_forward_nonack: failure to add branches
>>>> May 15 02:47:03 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 381148
>>>> local0.error] ERROR: build_req_buf_from_sip_req: out of memory
>>>> May 15 02:47:03 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 933771
>>>> local0.error] ERROR:tm:print_uac_request: no pkg_mem
>>>>
>>>> Version: OpenSER 1.2.0
>>>> OS: Solaris 10
>>>> Arch: Sparc
>>>>
>>>> Is there a memory leak an any module? or is my memory too low?
>>>>
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> Users mailing list
>>>>
>>>>
http://openser.org/cgi-bin/mailman/listinfo/users
>>>>
>>>>
>>>>
>>> _______________________________________________
>>> Users mailing list
>>> Users(a)openser.org
>>>
http://openser.org/cgi-bin/mailman/listinfo/users
>>>
>>>
>>>
>>>
>
>