Hello,
On 12/09/08 20:41, Aurelien Grimaud wrote:
Daniel-Constantin Mierla a écrit :
On 12/09/08 18:52, Aurelien Grimaud wrote:
I am able to reproduce it with 1 call / second
without my module on
BYE requests.
here are traces.
there is a race (at least), indeed. It happens when there is fast
reply. I am going to send you a patch soon for testing, you use svn
branch 1.4 or the tarball?
Great, I use the kamailio-1.4.2-notls tarball.
But I can test any SVN branch / trunk if you wish.
can you test the attached patch with SVN trunk? Let me know the results.
Pay attention to see if breaks something else, not just if fixes the
reported issue. I let there some debug messages, to help
troubleshooting, if the fix is ok, I'll remove them before committing.
Thanks,
Daniel
Aurelien
Cheers,
Daniel
> ps: I added the ms on Logs.
>
> Aurelien
>
> Daniel-Constantin Mierla a écrit :
>
>> On 12/09/08 17:56, Klaus Darilion wrote:
>>
>>
>>> Daniel-Constantin Mierla schrieb:
>>>
>>>
>>>> Hello,
>>>>
>>>> On 12/09/08 17:31, Klaus Darilion wrote:
>>>>
>>>>
>>>>> Aurelien Grimaud schrieb:
>>>>>
>>>>>
>>>>>
>>>>>> Fair enough.
>>>>>> If no one already experienced this strange behavior, it should
>>>>>> be my module ...
>>>>>> I'll try to make it again without my module.
>>>>>>
>>>>>>
>>>>> See my other email.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>> However, in the log, after the 200 response, there is a
>>>>>> cleanup_uac_timers: RETR/FR timers reset.
>>>>>> So those timers are cleared.
>>>>>>
>>>>>>
>>>>> But the problem is, that the process which handles the INVITE has
>>>>> not finished yet and those (re)SETS the timer.
>>>>>
>>>>> @Daniel - Have you investigated the problem?
>>>>>
>>>>>
>>>> so this is the half of the issue reported via:
>>>>
https://sourceforge.net/tracker/index.php?func=detail&aid=2105813&g…
>>>>
>>>>
>>> yes.
>>>
>>> Can it be related to other modules which register callbacks (e.g.
>>> pua module or Aurelien's module?
>>>
>>>
>> what is the requests/second rate when the issue appears?
>>
>> At first look, between sending and setting retransmission timer,
>> there is no much processing for the request. The callback executed
>> there is in use by siptrace, are you using this module?
>>
>> Cheers,
>> Daniel
>>
>>
>>>> This one got lost, but as I started to fix the other half
>>>> (replying using proper mode to do retransmission), will
>>>> investigate this as well ...
>>>>
>>>> Cheers,
>>>> Daniel
>>>>
_______________________________________________
Users mailing list
Users(a)lists.kamailio.org
http://lists.kamailio.org/cgi-bin/mailman/listinfo/users
--
Daniel-Constantin Mierla
http://www.asipto.com
Index: modules/tm/t_reply.c
===================================================================
--- modules/tm/t_reply.c (revision 5322)
+++ modules/tm/t_reply.c (working copy)
@@ -1030,8 +1030,8 @@
/* reset FR/retransmission timers */
for (i=t->first_branch; i<t->nr_of_outgoings; i++ ) {
- reset_timer( &t->uac[i].request.retr_timer );
- reset_timer( &t->uac[i].request.fr_timer );
+ reset_timer( &t->uac[i].request.retr_timer, TIMER_RB_DEL);
+ reset_timer( &t->uac[i].request.fr_timer, TIMER_RB_DEL);
}
LM_DBG("RETR/FR timers reset\n");
}
@@ -1361,9 +1361,9 @@
/* it's a cancel which is not e2e ? */
if ( get_cseq(p_msg)->method_id==METHOD_CANCEL && is_invite(t) ) {
/* ... then just stop timers */
- reset_timer( &uac->local_cancel.retr_timer);
+ reset_timer( &uac->local_cancel.retr_timer, TIMER_RB_DEL);
if ( msg_status >= 200 ) {
- reset_timer( &uac->local_cancel.fr_timer);
+ reset_timer( &uac->local_cancel.fr_timer, TIMER_RB_DEL);
}
LM_DBG("reply to local CANCEL processed\n");
goto done;
@@ -1371,11 +1371,11 @@
/* *** stop timers *** */
/* stop retransmission */
- reset_timer(&uac->request.retr_timer);
+ reset_timer(&uac->request.retr_timer, TIMER_RB_STOP);
/* stop final response timer only if I got a final response */
if ( msg_status >= 200 ) {
- reset_timer( &uac->request.fr_timer);
+ reset_timer( &uac->request.fr_timer, TIMER_RB_STOP);
}
/* acknowledge negative INVITE replies (do it before detailed
Index: modules/tm/t_funcs.c
===================================================================
--- modules/tm/t_funcs.c (revision 5322)
+++ modules/tm/t_funcs.c (working copy)
@@ -123,8 +123,8 @@
{
set_kr(REQ_RLSD);
- reset_timer( & trans->uas.response.fr_timer );
- reset_timer( & trans->uas.response.retr_timer );
+ reset_timer( & trans->uas.response.fr_timer , TIMER_RB_DEL );
+ reset_timer( & trans->uas.response.retr_timer, TIMER_RB_DEL );
cleanup_uac_timers( trans );
Index: modules/tm/t_fwd.c
===================================================================
--- modules/tm/t_fwd.c (revision 5322)
+++ modules/tm/t_fwd.c (working copy)
@@ -552,8 +552,8 @@
for (i=t_invite->first_branch; i<t_invite->nr_of_outgoings; i++) {
if (t_invite->uac[i].last_received==0){
/* reset the "request" timers */
- reset_timer(&t_invite->uac[i].request.retr_timer);
- reset_timer(&t_invite->uac[i].request.fr_timer);
+ reset_timer(&t_invite->uac[i].request.retr_timer, TIMER_RB_DEL);
+ reset_timer(&t_invite->uac[i].request.fr_timer, TIMER_RB_DEL);
LOCK_REPLIES( t_invite );
if (RPS_ERROR==relay_reply(t_invite,FAKED_REPLY,i,487,&dummy_bm))
lowest_error = -1; /* force sending 500 error */
@@ -718,6 +718,9 @@
-p_msg->REQ_METHOD);
}
+ //LM_DBG("++++++++++++ before sleep\n");
+ //sleep(2);
+ //LM_DBG("++++++++++++ after sleep\n");
start_retr( &t->uac[i].request );
set_kr(REQ_FWDED);
}
Index: modules/tm/timer.c
===================================================================
--- modules/tm/timer.c (revision 5322)
+++ modules/tm/timer.c (working copy)
@@ -302,7 +302,7 @@
LM_DBG("retransmission_handler : request resending"
" (t=%p, %.9s ... )\n", r_buf->my_T, r_buf->buffer.s);
if (SEND_BUFFER( r_buf )==-1) {
- reset_timer( &r_buf->fr_timer );
+ reset_timer( &r_buf->fr_timer, TIMER_RB_DEL );
fake_reply(r_buf->my_T, r_buf->branch, 503 );
return;
}
@@ -346,7 +346,7 @@
}
# endif
- reset_timer( &(r_buf->retr_timer) );
+ reset_timer( &(r_buf->retr_timer) , TIMER_RB_DEL);
/* the transaction is already removed from FR_LIST by the timer */
@@ -384,8 +384,8 @@
{
int i;
for (i=0; i<t->nr_of_outgoings; i++ ) {
- reset_timer( &t->uac[i].local_cancel.retr_timer );
- reset_timer( &t->uac[i].local_cancel.fr_timer );
+ reset_timer( &t->uac[i].local_cancel.retr_timer, TIMER_RB_DEL);
+ reset_timer( &t->uac[i].local_cancel.fr_timer , TIMER_RB_DEL);
}
}
@@ -809,7 +809,7 @@
* (successive set_timer won't work unless you're lucky
* an catch the race condition, the idea here is there is no
* guarantee you can do anything after a timer_reset)*/
-void reset_timer( struct timer_link* tl )
+void reset_timer( struct timer_link* tl , int state)
{
/* disqualify this timer from execution by setting its time_out
to zero; it will stay in timer-list until the timer process
@@ -817,9 +817,10 @@
but not execute; there is a race condition, though -- see
timer.c for more details
*/
- tl->deleted = 1;
+ tl->deleted = state;
+ LM_DBG("+++++++++++ tl=%p, st=%d)\n", tl, state);
#ifdef EXTRA_DEBUG
- LM_DBG("(group %d, tl=%p)\n", tl->tg, tl );
+ LM_DBG("(group %d, tl=%p, st=%d)\n", tl->tg, tl, state);
#endif
}
@@ -848,6 +849,13 @@
return;
}
+ /* rb timer already scheduled for delete */
+ if(new_tl->deleted==TIMER_RB_DEL)
+ {
+ LM_DBG("++++++++++++ too late to arm the timer [%d]\n", list_id);
+ //return;
+ }
+
if (!ext_timeout) {
timeout = timer_id2timeout[ list_id ];
} else {
Index: modules/tm/timer.h
===================================================================
--- modules/tm/timer.h (revision 5322)
+++ modules/tm/timer.h (working copy)
@@ -57,6 +57,9 @@
NR_OF_TIMER_LISTS
};
+#define TIMER_RB_INIT 0
+#define TIMER_RB_STOP 1
+#define TIMER_RB_DEL 2
/* all you need to put a cell in a timer list
links to neighbors and timer value */
@@ -67,7 +70,7 @@
struct timer_link *ld_tl;
volatile utime_t time_out;
struct timer *timer_list;
- unsigned int deleted;
+ volatile unsigned int deleted;
#ifdef EXTRA_DEBUG
enum timer_groups tg;
#endif
@@ -106,7 +109,7 @@
void init_timer_list( enum lists list_id);
void reset_timer_list( enum lists list_id);
-void reset_timer( struct timer_link* tl );
+void reset_timer( struct timer_link* tl, int state);
/* determine timer length and put on a correct timer list */
void set_timer( struct timer_link *new_tl, enum lists list_id,