Back to home page

Enduro/X

 
 

    


0001 /**
0002  * @brief Singleton group lock provider
0003  *   At normal start it tries to lock immeditaly (however this gives slight risk
0004  *   that if node 2 just lost the lock, cpmsrv/ndrxd would not have enought time
0005  *   to kill the processes). 
0006  *
0007  * @file exsinglesv.c
0008  */
0009 /* -----------------------------------------------------------------------------
0010  * Enduro/X Middleware Platform for Distributed Transaction Processing
0011  * Copyright (C) 2009-2016, ATR Baltic, Ltd. All Rights Reserved.
0012  * Copyright (C) 2017-2023, Mavimax, Ltd. All Rights Reserved.
0013  * This software is released under one of the following licenses:
0014  * AGPL (with Java and Go exceptions) or Mavimax's license for commercial use.
0015  * See LICENSE file for full text.
0016  * -----------------------------------------------------------------------------
0017  * AGPL license:
0018  *
0019  * This program is free software; you can redistribute it and/or modify it under
0020  * the terms of the GNU Affero General Public License, version 3 as published
0021  * by the Free Software Foundation;
0022  *
0023  * This program is distributed in the hope that it will be useful, but WITHOUT ANY
0024  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
0025  * PARTICULAR PURPOSE. See the GNU Affero General Public License, version 3
0026  * for more details.
0027  *
0028  * You should have received a copy of the GNU Affero General Public License along 
0029  * with this program; if not, write to the Free Software Foundation, Inc.,
0030  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0031  *
0032  * -----------------------------------------------------------------------------
0033  * A commercial use license is available from Mavimax, Ltd
0034  * contact@mavimax.com
0035  * -----------------------------------------------------------------------------
0036  */
0037 #include <stdio.h>
0038 #include <stdlib.h>
0039 #include <string.h>
0040 #include <errno.h>
0041 #include <regex.h>
0042 #include <utlist.h>
0043 #include <unistd.h>
0044 #include <signal.h>
0045 
0046 #include <ndebug.h>
0047 #include <atmi.h>
0048 #include <atmi_int.h>
0049 #include <typed_buf.h>
0050 #include <ndrstandard.h>
0051 #include <ubf.h>
0052 #include <ubfutil.h>
0053 #include <cconfig.h>
0054 #include "exsinglesv.h"
0055 #include <singlegrp.h>
0056 #include <lcfint.h>
0057 #include <exthpool.h>
0058 #include <Exfields.h>
0059 /*---------------------------Externs------------------------------------*/
0060 /*---------------------------Macros-------------------------------------*/
0061 #define PROGSECTION "@exsinglesv"
0062 #define MIN_SGREFRESH_CEOFFICIENT   3 /**< Minimum devider to use faults            */
0063 #define DEFAULT_CHECK_INTERVAL      5 /**< Default lock refresh interval            */
0064 #define DEFAULT_SVCTOUT             3 /**< Default timeout for svc to disk fallback */
0065 /*---------------------------Enums--------------------------------------*/
0066 /*---------------------------Typedefs-----------------------------------*/
0067 /*---------------------------Globals------------------------------------*/
0068 /*---------------------------Statics------------------------------------*/
0069 expublic ndrx_exsinglesv_conf_t ndrx_G_exsinglesv_conf; /**< Configuration */
0070 /*---------------------------Prototypes---------------------------------*/
0071 
0072 /**
0073  * Load error code to UBF buffer
0074  * @param p_ub UBF buffer
0075  * @param error_code error code
0076  * @param fmt format string
0077  */
0078 expublic void ndrx_exsinglesv_set_error_fmt(UBFH *p_ub, long error_code, const char *fmt, ...)
0079 {
0080     char msg[MAX_TP_ERROR_LEN+1] = {EXEOS};
0081     va_list ap;
0082 
0083     va_start(ap, fmt);
0084     (void) vsnprintf(msg, sizeof(msg), fmt, ap);
0085     va_end(ap);
0086 
0087     if (TPMINVAL==error_code)
0088     {
0089         TP_LOG(log_debug, "approving request %ld: [%s]", error_code, msg);
0090     }
0091     else
0092     {
0093         TP_LOG(log_error, "Setting error %ld: [%s]", error_code, msg);
0094     }
0095 
0096     Bchg(p_ub, EX_TPERRNO, 0, (char *)&error_code, 0L);
0097     Bchg(p_ub, EX_TPSTRERROR, 0, msg, 0L);
0098 }
0099 
0100 /**
0101  * Do initialization
0102  * Have a local MIB & shared MIB
0103  */
0104 int NDRX_INTEGRA(tpsvrinit)(int argc, char **argv)
0105 {
0106     int ret=EXSUCCEED;
0107     ndrx_inicfg_t *cfg = NULL;
0108     ndrx_inicfg_section_keyval_t *params = NULL;
0109     ndrx_inicfg_section_keyval_t *el, *elt;
0110     char *p;
0111     int ndrx_sgrefresh;
0112     char svcnm[MAXTIDENT+1]={EXEOS};
0113 
0114     /* Only singleton server sections needed */
0115     char *sections[] = {"@singlesv",
0116                     NULL};
0117 
0118     memset(&ndrx_G_exsinglesv_conf, 0, sizeof(ndrx_G_exsinglesv_conf));
0119 
0120     /* set default: */
0121     ndrx_G_exsinglesv_conf.chkinterval = EXFAIL;
0122     ndrx_G_exsinglesv_conf.locked_wait = EXFAIL;
0123     ndrx_G_exsinglesv_conf.svc_timeout = DEFAULT_SVCTOUT;
0124     
0125     if (EXSUCCEED!=ndrx_cconfig_load_sections(&cfg, sections))
0126     {
0127         TP_LOG(log_error, "Failed to load configuration");
0128         EXFAIL_OUT(ret);
0129     }
0130 
0131     /* Load params by using ndrx_inicfg_get_subsect() */
0132     if (EXSUCCEED!=ndrx_cconfig_get_cf(cfg, PROGSECTION, &params))
0133     {
0134         TP_LOG(log_error, "Failed to load configuration section [%s]", PROGSECTION);
0135         EXFAIL_OUT(ret);
0136     }
0137 
0138     /* Iterate over params */
0139     EXHASH_ITER(hh, params, el, elt)
0140     {
0141         TP_LOG(log_info, "Param: [%s]=[%s]", el->key, el->val);
0142 
0143         /* read the params such as:
0144         [@exsinglegrp/<CCTAG>]
0145         singlegrp=4
0146         lockfile_1=/some/file.1
0147         lockfile_2=/some/file2
0148         exec_on_bootlocked=/some/script/to/exec.sh
0149         exec_on_locked=/some/script/to/exec.sh
0150         interval=3
0151         # this will make tototally to wait 6 seconds before taking over
0152         # (in case if we lock in non boot order)
0153         locked_wait=2
0154         to the global vars
0155         # number of threads for local requests
0156         # and number of threads for remote requests
0157         threads=5
0158         */
0159        
0160        /* read value from NDRX_SINGLEGRPLP env */
0161        if (0==strcmp(el->key, "lockfile_1"))
0162        {
0163             NDRX_STRCPY_SAFE(ndrx_G_exsinglesv_conf.lockfile_1, el->val);
0164        }
0165        else if (0==strcmp(el->key, "lockfile_2"))
0166        {
0167             NDRX_STRCPY_SAFE(ndrx_G_exsinglesv_conf.lockfile_2, el->val);
0168        }
0169        else if (0==strcmp(el->key, "exec_on_bootlocked"))
0170        {
0171             NDRX_STRCPY_SAFE(ndrx_G_exsinglesv_conf.exec_on_bootlocked, el->val);
0172        }
0173        else if (0==strcmp(el->key, "exec_on_locked"))
0174        {
0175             NDRX_STRCPY_SAFE(ndrx_G_exsinglesv_conf.exec_on_locked, el->val);
0176        }
0177        else if (0==strcmp(el->key, "chkinterval"))
0178        {
0179             ndrx_G_exsinglesv_conf.chkinterval = atoi(el->val);
0180        }
0181        else if (0==strcmp(el->key, "noremote"))
0182        {
0183             ndrx_G_exsinglesv_conf.noremote = atoi(el->val);
0184        }
0185        /* Number of cycles to wait to proceed with group locking in shared
0186         * memory. This is needed in case if we take over, then let other node
0187         * to kill all the processes. This shall be larger than other nodes sanity cycle
0188         * length. This setting is number of chkinterval cycles.
0189         */
0190        else if (0==strcmp(el->key, "locked_wait"))
0191        {
0192             ndrx_G_exsinglesv_conf.locked_wait = atoi(el->val);
0193        }
0194        else if (0==strcmp(el->key, "svctout"))
0195        {
0196             ndrx_G_exsinglesv_conf.svc_timeout = atoi(el->val);
0197        }
0198        else
0199        {
0200             TP_LOG(log_debug, "Unknown parameter [%s]", el->key);
0201             EXFAIL_OUT(ret);
0202        }
0203 
0204     }
0205 
0206     p=getenv(CONF_NDRX_PROCGRP_LP_NO);
0207 
0208     if (NULL==p)
0209     {
0210         TP_LOG(log_error, "Missing %s environment variable", 
0211             CONF_NDRX_PROCGRP_LP_NO);
0212         EXFAIL_OUT(ret);
0213     }
0214 
0215     ndrx_G_exsinglesv_conf.procgrp_lp_no = atoi(p);
0216 
0217     /* check is it valid against singleton groups */
0218     if (!ndrx_sg_is_valid(ndrx_G_exsinglesv_conf.procgrp_lp_no))
0219     {
0220         TP_LOG(log_error, "Invalid singleton process group number [%d], "
0221             "check %s env setting",
0222             ndrx_G_exsinglesv_conf.procgrp_lp_no, CONF_NDRX_PROCGRP_LP_NO);
0223         EXFAIL_OUT(ret);
0224     }
0225 
0226     if (EXEOS==ndrx_G_exsinglesv_conf.lockfile_1[0])
0227     {
0228         TP_LOG(log_error, "Invalid lockfile_1");
0229         EXFAIL_OUT(ret);
0230     }
0231 
0232     if (EXEOS==ndrx_G_exsinglesv_conf.lockfile_2[0])
0233     {
0234         TP_LOG(log_error, "Invalid lockfile_2");
0235         EXFAIL_OUT(ret);
0236     }
0237 
0238     if (0==strcmp(ndrx_G_exsinglesv_conf.lockfile_1, ndrx_G_exsinglesv_conf.lockfile_2))
0239     {
0240         TP_LOG(log_error, "lockfile_1 and lockfile_2 shall be different");
0241         EXFAIL_OUT(ret);
0242     }
0243 
0244     ndrx_sgrefresh = ndrx_G_libnstd_cfg.sgrefreshmax;
0245 
0246     if (0>=ndrx_G_exsinglesv_conf.chkinterval)
0247     {        
0248         ndrx_G_exsinglesv_conf.chkinterval = ndrx_sgrefresh/MIN_SGREFRESH_CEOFFICIENT;
0249 
0250         /* generate error */
0251         if (ndrx_G_exsinglesv_conf.chkinterval<=0)
0252         {
0253             TP_LOG(log_error, "Invalid value for %s env setting. "
0254                 "To use defaults, it shall be atleast %d", 
0255                 CONF_NDRX_SGREFRESH, MIN_SGREFRESH_CEOFFICIENT);
0256             userlog("Invalid value for %s env setting. "
0257                 "To use defaults, it shall be atleast %d", 
0258                 CONF_NDRX_SGREFRESH, MIN_SGREFRESH_CEOFFICIENT);
0259             EXFAIL_OUT(ret);
0260         }
0261     }
0262 
0263     if (EXFAIL==ndrx_G_exsinglesv_conf.locked_wait)
0264     {
0265         /* giver other node time to detect and shutdown 
0266          * basically if other system has default 30 sec refresh time,
0267          * then in those 30 sec they should detect that lock is expired
0268          * and shutdown all the processes. So we shall wait twice the time.
0269          */
0270         ndrx_G_exsinglesv_conf.locked_wait = ndrx_sgrefresh/ndrx_G_exsinglesv_conf.chkinterval*2;
0271     }
0272 
0273     /* Dump the configuration to the log file */
0274     TP_LOG(log_info, "svcnm = [%s]", svcnm);
0275     TP_LOG(log_info, "procgrp_lp_no=%d", ndrx_G_exsinglesv_conf.procgrp_lp_no);
0276     TP_LOG(log_info, "lockfile_1=[%s]", ndrx_G_exsinglesv_conf.lockfile_1);
0277     TP_LOG(log_info, "lockfile_2=[%s]", ndrx_G_exsinglesv_conf.lockfile_2);
0278     TP_LOG(log_info, "exec_on_bootlocked=[%s]", ndrx_G_exsinglesv_conf.exec_on_bootlocked);
0279     TP_LOG(log_info, "exec_on_locked=[%s]", ndrx_G_exsinglesv_conf.exec_on_locked);
0280     
0281     /* Key timing configuration: */
0282     TP_LOG(log_info, "ndrx_sgrefresh=%d", ndrx_sgrefresh);
0283     TP_LOG(log_info, "chkinterval=%d", ndrx_G_exsinglesv_conf.chkinterval);
0284     TP_LOG(log_info, "locked_wait=%d (number of chkinterval cycles)", 
0285         ndrx_G_exsinglesv_conf.locked_wait);
0286     TP_LOG(log_info, "svc_timeout=%d", ndrx_G_exsinglesv_conf.svc_timeout);
0287     TP_LOG(log_info, "noremote=%d", ndrx_G_exsinglesv_conf.noremote);
0288     /* TP_LOG(log_info, "clock_tolerance=%s", ndrx_G_exsinglesv_conf.clock_tolerance); */
0289 
0290     /* Validate check interval: */
0291     if (ndrx_G_exsinglesv_conf.chkinterval*MIN_SGREFRESH_CEOFFICIENT > ndrx_sgrefresh)
0292     {
0293         TP_LOG(log_warn, "WARNING: `%s' (%d) shall be at least %d times "
0294                 "bigger than 'chkinterval' (%d)",
0295                 CONF_NDRX_SGREFRESH, ndrx_sgrefresh, MIN_SGREFRESH_CEOFFICIENT,
0296                 ndrx_G_exsinglesv_conf.chkinterval);
0297         userlog("WARNING: `%s' (%d) shall be at least %d times "
0298                 "bigger than 'chkinterval' (%d)",
0299                 CONF_NDRX_SGREFRESH, ndrx_sgrefresh, MIN_SGREFRESH_CEOFFICIENT,
0300                 ndrx_G_exsinglesv_conf.chkinterval);
0301     }
0302 
0303 #ifdef NDRX_EXSINGLESV_LP
0304 
0305     /* compile state machine: */
0306     if (EXSUCCEED!=ndrx_exsinglesv_sm_comp())
0307     {
0308         TP_LOG(log_error, "Statemachine error");
0309         userlog("Statemachine error");
0310         EXFAIL_OUT(ret);
0311     }
0312     
0313     /* Register timer check.... */
0314     if (EXSUCCEED==ret &&
0315             EXSUCCEED!=tpext_addperiodcb(ndrx_G_exsinglesv_conf.chkinterval, 
0316             ndrx_exsinglesv_sm_run))
0317     {
0318         TP_LOG(log_error, "tpext_addperiodcb failed: %s",
0319                         tpstrerror(tperrno));
0320         EXFAIL_OUT(ret);
0321     }
0322 
0323     p=getenv(CONF_NDRX_RESPAWN);
0324 
0325     if (NULL!=p && 0==strcmp(p, "1"))
0326     {
0327         TP_LOG(log_warn, "Lock server respawn after the crash, "
0328             "will use locked_wait for first lock");
0329             ndrx_G_exsinglesv_conf.first_boot = EXFALSE;
0330     }
0331     else
0332     {
0333         ndrx_G_exsinglesv_conf.first_boot = EXTRUE;
0334     }
0335     /* perform first check
0336     * so that on boot, the first locked node would boot without any interruptions */
0337     if (EXSUCCEED!=ndrx_exsinglesv_sm_run())
0338     {
0339         TP_LOG(log_error, "Failed to perform lock check");
0340         EXFAIL_OUT(ret);
0341     }
0342 
0343     ndrx_G_exsinglesv_conf.first_boot = EXFALSE;
0344     /* report us as lock provider */
0345     tpext_configprocgrp_lp (ndrx_G_exsinglesv_conf.procgrp_lp_no);
0346 
0347 #elif NDRX_EXSINGLESV_CHK_LOCAL
0348 
0349     /* call server for results */
0350     snprintf(svcnm, sizeof(svcnm), NDRX_SVC_SGLOC, 
0351         tpgetnodeid(), ndrx_G_exsinglesv_conf.procgrp_lp_no);
0352 
0353     /* advertise our service ... */
0354     if (EXSUCCEED!=tpadvertise(svcnm, SGLOC))
0355     {
0356         TP_LOG(log_error, "Failed to advertise service [%s]: %s", 
0357             svcnm, tpstrerror(tperrno));
0358         EXFAIL_OUT(ret);
0359     }
0360 #else
0361     /* call server for results */
0362     snprintf(svcnm, sizeof(svcnm), NDRX_SVC_SGREM, 
0363         tpgetnodeid(), ndrx_G_exsinglesv_conf.procgrp_lp_no);
0364 
0365     /* advertise our service ... */
0366     if (EXSUCCEED!=tpadvertise(svcnm, SGREM))
0367     {
0368         TP_LOG(log_error, "Failed to advertise service [%s]: %s", 
0369             svcnm, tpstrerror(tperrno));
0370         EXFAIL_OUT(ret);
0371     }
0372 #endif
0373 
0374 out:
0375 
0376     if (NULL!=params)
0377     {
0378         ndrx_keyval_hash_free(params);
0379     }
0380 
0381     if (NULL!=cfg)
0382     {
0383         ndrx_inicfg_free(cfg);
0384     }
0385 
0386     return ret;
0387 }
0388 
0389 void NDRX_INTEGRA(tpsvrdone)(void)
0390 {
0391 
0392 #ifdef NDRX_EXSINGLESV_LP
0393     /* unlock the group from SHM and unlock / close the lockfiles */
0394     ndrx_exsinglesv_uninit(EXTRUE, EXFALSE);
0395 #endif
0396     
0397 }
0398 
0399 /* vim: set ts=4 sw=4 et smartindent: */