Back to home page

Enduro/X

 
 

    


0001 /**
0002  * @brief Respawning dead server processes.
0003  *
0004  * @file respawn.c
0005  */
0006 /* -----------------------------------------------------------------------------
0007  * Enduro/X Middleware Platform for Distributed Transaction Processing
0008  * Copyright (C) 2009-2016, ATR Baltic, Ltd. All Rights Reserved.
0009  * Copyright (C) 2017-2023, Mavimax, Ltd. All Rights Reserved.
0010  * This software is released under one of the following licenses:
0011  * AGPL (with Java and Go exceptions) or Mavimax's license for commercial use.
0012  * See LICENSE file for full text.
0013  * -----------------------------------------------------------------------------
0014  * AGPL license:
0015  *
0016  * This program is free software; you can redistribute it and/or modify it under
0017  * the terms of the GNU Affero General Public License, version 3 as published
0018  * by the Free Software Foundation;
0019  *
0020  * This program is distributed in the hope that it will be useful, but WITHOUT ANY
0021  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
0022  * PARTICULAR PURPOSE. See the GNU Affero General Public License, version 3
0023  * for more details.
0024  *
0025  * You should have received a copy of the GNU Affero General Public License along 
0026  * with this program; if not, write to the Free Software Foundation, Inc.,
0027  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0028  *
0029  * -----------------------------------------------------------------------------
0030  * A commercial use license is available from Mavimax, Ltd
0031  * contact@mavimax.com
0032  * -----------------------------------------------------------------------------
0033  */
0034 #include <string.h>
0035 #include <stdio.h>
0036 #include <stdlib.h>
0037 #include <errno.h>
0038 #include <memory.h>
0039 #include <sys/types.h>
0040 #include <dirent.h>
0041 #include <sys/stat.h>
0042 
0043 #include <utlist.h>
0044 
0045 #include <ndrstandard.h>
0046 #include <ndrxd.h>
0047 #include <atmi_int.h>
0048 #include <nstopwatch.h>
0049 
0050 #include <ndebug.h>
0051 #include <cmd_processor.h>
0052 #include <signal.h>
0053 
0054 #include "userlog.h"
0055 #include <lcfint.h>
0056 #include <singlegrp.h>
0057 
0058 /*---------------------------Externs------------------------------------*/
0059 /*---------------------------Macros-------------------------------------*/
0060 /*---------------------------Enums--------------------------------------*/
0061 /*---------------------------Typedefs-----------------------------------*/
0062 /*---------------------------Globals------------------------------------*/
0063 /*---------------------------Statics------------------------------------*/
0064 /*---------------------------Prototypes---------------------------------*/
0065 
0066 /**
0067  * This will check the list of dead processes and will try to start them up
0068  * if the wait time have been reached.
0069  * 
0070  * Forbid running function in restart mode, respawns with wait, can cause
0071  * that, and that can break the logic of sequential order startups (required
0072  * for singleton group boots at failover).
0073  * 
0074  * @return 
0075  */
0076 expublic int do_respawn_check(void)
0077  {
0078     int ret=EXSUCCEED;
0079     pm_node_t *p_pm;
0080     long delta;
0081     int abort = EXFALSE;
0082     int nrgrps = ndrx_G_libnstd_cfg.pgmax;
0083     int sg_groups[nrgrps];
0084     static int into_respawn = EXFALSE;
0085     int we_run = EXFALSE;
0086     int singleton_attempt;
0087 
0088     /* No sanity checks while app config not loaded */
0089     if (NULL==G_app_config)
0090     {
0091         goto out;
0092     }
0093 
0094     if (into_respawn)
0095     {
0096         NDRX_LOG(log_debug, "do_respawn_check: recursive call, bypass the run!");
0097         goto out;
0098     }
0099 
0100     we_run=EXTRUE;
0101     into_respawn=EXTRUE;
0102 
0103     NDRX_LOG(6, "Time for respawning checking...");
0104 
0105     /* use snapshoo checks here... */
0106     ndrx_sg_get_lock_snapshoot(sg_groups, &nrgrps, 0);
0107 
0108     DL_FOREACH(G_process_model, p_pm)
0109     {
0110         NDRX_LOG(6, "Proc: %s, Reqstate %d, curstate %d",
0111             p_pm->binary_name, p_pm->reqstate, p_pm->state);
0112 
0113         if ((NDRXD_PM_RUNNING_OK==p_pm->reqstate || NDRXD_PM_RESTART==p_pm->reqstate) 
0114                 && PM_NOT_RUNNING(p_pm->state))
0115         {
0116             if (NDRXD_PM_RESTART==p_pm->reqstate)
0117             {
0118                 NDRX_LOG(log_warn, "Proc: %s (Srvid: %d), Reqstate %d, curstate %d "
0119                     "starting as restart was requested",
0120                     p_pm->binary_name, p_pm->srvid, p_pm->reqstate, p_pm->state);
0121                 p_pm->reqstate=NDRXD_PM_RUNNING_OK;
0122             }
0123             else if (!p_pm->conf->respawn)
0124             {
0125                 NDRX_LOG(6, "respawn param is off -> continue with next...");
0126                 continue;
0127             }
0128 
0129             if ( NDRXD_PM_WAIT==p_pm->state 
0130                 && ndrx_ndrxconf_procgroups_is_singleton(G_app_config->procgroups, p_pm->conf->procgrp_no)
0131                 && p_pm->conf->procgrp_no > 0)
0132             {
0133                 singleton_attempt=EXTRUE;
0134             }
0135             else
0136             {
0137                 singleton_attempt=EXFALSE;
0138             }
0139 
0140             /*
0141             delta = p_pm->rspstwatch;
0142             */
0143             delta = p_pm->state_changed;
0144 
0145             NDRX_LOG(6, "Respawn delta: %ld singleton_attempt: %d", delta, singleton_attempt);
0146 
0147             /* Check is it time for startup? Note that exec_seq_try is incremented prior respawn
0148              * check. Thus the first cycle of sanity is without try increment.
0149              */
0150             if ( (p_pm->exec_seq_try<=1 && delta >= G_app_config->restart_min)
0151             || (p_pm->exec_seq_try>1 
0152                            && delta >= (G_app_config->restart_min+(p_pm->exec_seq_try-1)*G_app_config->restart_step))
0153                     || (delta >= G_app_config->restart_max)
0154                     || singleton_attempt)
0155             {
0156                 long processes_started=0;
0157                 long schedule_next;
0158                 int do_wait = EXFALSE;
0159 
0160                 /* 
0161                  * If process is in group and previous state was "wait", then we shall wait for process response
0162                  * (if it is bootable, checked by start_process), to keep the order of booting.
0163                  */
0164                 if ( singleton_attempt
0165                     && !(NDRX_SG_NO_ORDER & sg_groups[p_pm->conf->procgrp_no-1]) )
0166                 {
0167                     /* Ordering required, lets wait */
0168                     do_wait = EXTRUE;
0169                 }
0170 
0171                 NDRX_LOG(log_warn, "Respawning server: srvid: %d,"
0172                         " name: [%s], seq try: %d, already not running: %d secs, singleton_attempt: %d, do_wait: %d",
0173                         p_pm->srvid, p_pm->binary_name, p_pm->exec_seq_try, delta, singleton_attempt, do_wait);
0174 
0175                 /* if not doing singleton_attempt, then it is respawn... */
0176                 start_process(NULL, p_pm, NULL, &processes_started, do_wait, &abort, sg_groups, !singleton_attempt);
0177 
0178                 /***Just for info***/
0179                 schedule_next = G_app_config->restart_min+p_pm->exec_seq_try*G_app_config->restart_step;
0180                 if (schedule_next>G_app_config->restart_max)
0181                     schedule_next = G_app_config->restart_max;
0182 
0183                 NDRX_LOG(log_warn, "next try after: %d sty",
0184                         schedule_next);
0185             }
0186         }
0187 
0188     }/*DL_FOREACH*/
0189 
0190     /* set the boot flag */
0191     ndrx_mark_singlegrp_srv_booted(nrgrps, sg_groups);
0192 
0193 out:
0194 
0195     /* restore state as runnable.. */
0196     if (we_run)
0197     {
0198         into_respawn=EXFALSE;
0199     }
0200 
0201     return ret;
0202 }
0203 
0204 
0205 /* vim: set ts=4 sw=4 et smartindent: */