samba-cvs.cvs
[Top] [All Lists]

[SCM] CTDB repository - branch master updated - 60f3c04bd8b20ecbe937ffed

Subject: [SCM] CTDB repository - branch master updated - 60f3c04bd8b20ecbe937ffed08875cdc6898b422
From: Ronnie Sahlberg
Date: Mon, 7 Jul 2008 05:43:28 -0500 CDT
The branch, master has been updated
       via  60f3c04bd8b20ecbe937ffed08875cdc6898b422 (commit)
      from  6043f926f89b361c7fe14fc60d2769fd2ba63dfc (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 60f3c04bd8b20ecbe937ffed08875cdc6898b422
Author: Ronnie Sahlberg <ronniesahlberg@xxxxxxxxx>
Date:   Mon Jul 7 20:38:59 2008 +1000

    use more libral handling of event scripts timing out.
    
    If the event script that timed out was for the "monitor" event, then
    even if it timed out we still return SUCCESS back to the guy invoking the 
eventscript.
    Only consider the eventscript for "monitor" to have failed with an error
    IFF it actually terminated with an error,   or if it timed out 5 times in a 
row and hung.

-----------------------------------------------------------------------

Summary of changes:
 server/ctdb_tunables.c |    2 +-
 server/eventscript.c   |   32 +++++++++++++++++++++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)


Changeset truncated at 500 lines:

diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index 9b9c79c..d138137 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -38,7 +38,7 @@ static const struct {
        { "MonitorInterval",     15,  offsetof(struct ctdb_tunable, 
monitor_interval) },
        { "TickleUpdateInterval",20,  offsetof(struct ctdb_tunable, 
tickle_update_interval) },
        { "EventScriptTimeout",  20,  offsetof(struct ctdb_tunable, 
script_timeout) },
-       { "EventScriptBanCount",  3,  offsetof(struct ctdb_tunable, 
script_ban_count) },
+       { "EventScriptBanCount",  5,  offsetof(struct ctdb_tunable, 
script_ban_count) },
        { "RecoveryGracePeriod", 60,  offsetof(struct ctdb_tunable, 
recovery_grace_period) },
        { "RecoveryBanPeriod",  300,  offsetof(struct ctdb_tunable, 
recovery_ban_period) },
        { "DatabaseHashSize", 10000,  offsetof(struct ctdb_tunable, 
database_hash_size) },
diff --git a/server/eventscript.c b/server/eventscript.c
index 0e4af03..54d914b 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -257,15 +257,33 @@ static void ctdb_event_script_timeout(struct 
event_context *ev, struct timed_eve
 
        DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", 
state->options, ctdb->event_script_timeouts));
 
-       talloc_free(state);
-       callback(ctdb, -1, private_data);
-
-       ctdb->event_script_timeouts++;
-       if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
-               ctdb->event_script_timeouts = 0;
-               DEBUG(DEBUG_ERR, ("Maximum timeout count reached for 
eventscript. Banning self for %d seconds\n", 
ctdb->tunable.recovery_ban_period));
+       if (!strcmp(state->options, "monitor")) {
+               /* if it is a monitor event, we allow it to "hang" a few times
+                  before we declare it a failure and ban ourself (and make
+                  ourself unhealthy)
+               */
+               DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event 
timedout.\n"));
+
+               ctdb->event_script_timeouts++;
+               if (ctdb->event_script_timeouts > 
ctdb->tunable.script_ban_count) {
+                       ctdb->event_script_timeouts = 0;
+                       DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for 
eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, 
ctdb->tunable.recovery_ban_period));
+                       ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+                       callback(ctdb, -1, private_data);
+               } else {
+                       callback(ctdb, 0, private_data);
+               }
+       } else if (!strcmp(state->options, "startup")) {
+               DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event 
timedout.\n"));
+               callback(ctdb, -1, private_data);
+       } else {
+               /* if it is not a monitor event we ban ourself immediately */
+               DEBUG(DEBUG_ERR, (__location__ " eventscript for 
NON-monitor/NON-startup event timedout. Immediately banning ourself for %d 
seconds\n", ctdb->tunable.recovery_ban_period));
                ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+               callback(ctdb, -1, private_data);
        }
+
+       talloc_free(state);
 }
 
 /*


-- 
CTDB repository

<Prev in Thread] Current Thread [Next in Thread>
  • [SCM] CTDB repository - branch master updated - 60f3c04bd8b20ecbe937ffed08875cdc6898b422, Ronnie Sahlberg <=