Back to home page

Enduro/X

 
 

    


0001 /**
0002  * @brief Locking Finite State Machine
0003  *
0004  * @file locksm.c
0005  */
0006 /* -----------------------------------------------------------------------------
0007  * Enduro/X Middleware Platform for Distributed Transaction Processing
0008  * Copyright (C) 2009-2016, ATR Baltic, Ltd. All Rights Reserved.
0009  * Copyright (C) 2017-2023, Mavimax, Ltd. All Rights Reserved.
0010  * This software is released under one of the following licenses:
0011  * AGPL (with Java and Go exceptions) or Mavimax's license for commercial use.
0012  * See LICENSE file for full text.
0013  * -----------------------------------------------------------------------------
0014  * AGPL license:
0015  *
0016  * This program is free software; you can redistribute it and/or modify it under
0017  * the terms of the GNU Affero General Public License, version 3 as published
0018  * by the Free Software Foundation;
0019  *
0020  * This program is distributed in the hope that it will be useful, but WITHOUT ANY
0021  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
0022  * PARTICULAR PURPOSE. See the GNU Affero General Public License, version 3
0023  * for more details.
0024  *
0025  * You should have received a copy of the GNU Affero General Public License along 
0026  * with this program; if not, write to the Free Software Foundation, Inc.,
0027  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0028  *
0029  * -----------------------------------------------------------------------------
0030  * A commercial use license is available from Mavimax, Ltd
0031  * contact@mavimax.com
0032  * -----------------------------------------------------------------------------
0033  */
0034 #include <stdio.h>
0035 #include <stdlib.h>
0036 #include <string.h>
0037 #include <errno.h>
0038 #include <regex.h>
0039 #include <utlist.h>
0040 #include <unistd.h>
0041 #include <signal.h>
0042 
0043 #include <ndebug.h>
0044 #include <atmi.h>
0045 #include <atmi_int.h>
0046 #include <typed_buf.h>
0047 #include <ndrstandard.h>
0048 #include <ubf.h>
0049 #include <ubfutil.h>
0050 #include <cconfig.h>
0051 #include <exsm.h>
0052 #include <singlegrp.h>
0053 #include <lcfint.h>
0054 #include "exsinglesv.h"
0055 #include <sys_test.h>
0056 
0057 /*---------------------------Externs------------------------------------*/
0058 /*---------------------------Macros-------------------------------------*/
0059 #define NR_TRANS 10 /**< Max number of transitions for each state */
0060 #define SEQUENCE_START 5 /**< From where do we start? */
0061 /*---------------------------Enums--------------------------------------*/
0062 
0063 /**
0064  * Locking state machine events
0065  */
0066 enum
0067 {   
0068       ev_ok
0069     , ev_locked
0070     , ev_unlocked
0071     , ev_err
0072     , ev_wait
0073     , ev_busy
0074     , ev_abort
0075 };
0076 
0077 /**
0078  * Locking state machine states
0079  */
0080 enum
0081 {
0082     st_get_singlegrp
0083     , st_chk_l_lock
0084     , st_chk_l_unlock /**< Was local unlocked? */
0085     , st_ping_lock
0086     , st_ping_wait
0087     , st_shm_refresh
0088     , st_abort
0089     , st_abort_unlock
0090     , st_chk_mmon
0091     , st_do_lock
0092     , st_count
0093 };
0094 
0095 /*---------------------------Typedefs-----------------------------------*/
0096 NDRX_SM_T(ndrx_locksm_t, NR_TRANS);
0097 
0098 /*---------------------------Globals------------------------------------*/
0099 /*---------------------------Statics------------------------------------*/
0100 ndrx_locksm_ctx_t M_ctx; /**< Context for SM */
0101 /*---------------------------Prototypes---------------------------------*/
0102 
0103 exprivate int get_singlegrp(void *ctx);
0104 exprivate int chk_l_lock(void *ctx);
0105 exprivate int chk_l_unlock(void *ctx);
0106 exprivate int chk_mmon(void *ctx);
0107 exprivate int do_lock(void *ctx);
0108 exprivate int ping_lock(void *ctx);
0109 exprivate int shm_refresh(void *ctx);
0110 exprivate int do_abort(void *ctx);
0111 exprivate int do_abort_unlock(void *ctx);
0112 
0113 ndrx_locksm_t M_locksm[st_count] = {
0114 
0115     NDRX_SM_STATE( st_get_singlegrp, get_singlegrp,
0116           NDRX_SM_TRAN      (ev_locked,     st_chk_l_lock)
0117         , NDRX_SM_TRAN      (ev_unlocked,   st_chk_l_unlock)
0118         , NDRX_SM_TRAN      (ev_err,        st_abort)
0119         , NDRX_SM_TRAN_END
0120         )
0121     , NDRX_SM_STATE(st_chk_l_lock, chk_l_lock,
0122           NDRX_SM_TRAN      (ev_locked,     st_ping_lock)
0123         , NDRX_SM_TRAN      (ev_unlocked,   st_abort_unlock)
0124         , NDRX_SM_TRAN_END
0125         )
0126     , NDRX_SM_STATE(st_chk_l_unlock, chk_l_lock,
0127           NDRX_SM_TRAN      (ev_locked,     st_abort)
0128         , NDRX_SM_TRAN      (ev_wait,       st_ping_wait)
0129         , NDRX_SM_TRAN      (ev_unlocked,   st_chk_mmon)
0130         , NDRX_SM_TRAN_END
0131         )
0132     , NDRX_SM_STATE(st_ping_wait, ping_lock,
0133           NDRX_SM_TRAN      (ev_ok,         st_do_lock)
0134         , NDRX_SM_TRAN      (ev_err,        st_abort_unlock)
0135         , NDRX_SM_TRAN_END
0136         )
0137     , NDRX_SM_STATE(st_ping_lock, ping_lock,
0138           NDRX_SM_TRAN      (ev_ok,         st_shm_refresh)
0139         , NDRX_SM_TRAN      (ev_err,        st_abort_unlock)
0140         , NDRX_SM_TRAN_END
0141         )
0142     , NDRX_SM_STATE(st_shm_refresh, shm_refresh,
0143           NDRX_SM_TRAN      (ev_ok,         NDRX_SM_ST_RETURN0)
0144         , NDRX_SM_TRAN      (ev_err,        st_abort_unlock)
0145         , NDRX_SM_TRAN_END
0146         )
0147     , NDRX_SM_STATE(st_abort, do_abort,
0148           NDRX_SM_TRAN      (ev_abort,      NDRX_SM_ST_RETURN)
0149         , NDRX_SM_TRAN_END
0150         )
0151     , NDRX_SM_STATE(st_abort_unlock, do_abort_unlock,
0152           NDRX_SM_TRAN      (ev_abort,      NDRX_SM_ST_RETURN)
0153         , NDRX_SM_TRAN_END
0154         )
0155     , NDRX_SM_STATE(st_chk_mmon, chk_mmon,
0156           NDRX_SM_TRAN      (ev_ok,         st_do_lock)
0157         , NDRX_SM_TRAN      (ev_busy,       NDRX_SM_ST_RETURN0)
0158         , NDRX_SM_TRAN_END
0159         )
0160     , NDRX_SM_STATE(st_do_lock, do_lock,
0161           NDRX_SM_TRAN      (ev_ok,         NDRX_SM_ST_RETURN0)
0162         , NDRX_SM_TRAN      (ev_busy,       NDRX_SM_ST_RETURN0)
0163         , NDRX_SM_TRAN      (ev_wait,       NDRX_SM_ST_RETURN0)
0164         , NDRX_SM_TRAN      (ev_err,        st_abort_unlock)
0165         , NDRX_SM_TRAN_END
0166         )
0167 };
0168 
0169 /**
0170  * Read group entry, get ptr to SHM
0171  * and also get the local copy.
0172  * Calculate next refresh time.
0173  */
0174 exprivate int get_singlegrp(void *ctx)
0175 {
0176     ndrx_locksm_ctx_t *lock_ctx = (ndrx_locksm_ctx_t *)ctx;
0177     int ret, lock_status;
0178     struct timespec ts;
0179 
0180     /* get new time-stamp. Shall be done as first step, so that if
0181      * system freezes happens during the work of the binary
0182      * we would write expired time-stamp to shared memory
0183      * and that would cause to group to unlock
0184      */
0185     ndrx_realtime_get(&ts);
0186     lock_ctx->new_refresh = ts.tv_sec;
0187     
0188     lock_status = ndrx_exsinglesv_sg_is_locked(lock_ctx, EXTRUE);
0189 
0190     if (EXFAIL==lock_status)
0191     {
0192         ret=ev_err;
0193         goto out;
0194     }
0195 
0196     TP_LOG(log_debug, "Current group %d lock status: %d", 
0197         ndrx_G_exsinglesv_conf.procgrp_lp_no, lock_status);
0198 
0199     /* determine the current state of the group */
0200     switch(lock_status)
0201     {
0202         case EXTRUE:
0203             ret = ev_locked;
0204             break;
0205         default:
0206             ret = ev_unlocked;
0207             break;
0208     }
0209 
0210 out:
0211     return ret;
0212 }
0213 
0214 /**
0215  * Check if local process is locked
0216  */
0217 exprivate int chk_l_lock(void *ctx)
0218 {
0219     ndrx_locksm_ctx_t *lock_ctx = (ndrx_locksm_ctx_t *)ctx;
0220     int ret;
0221 
0222     if (ndrx_G_exsinglesv_conf.is_locked)
0223     {
0224         /* local process is locked */
0225         ret = ev_locked;
0226     }
0227     else if (ndrx_G_exsinglesv_conf.locked1)
0228     {
0229         /* doign recovery start */
0230         ret = ev_wait;
0231     }
0232     else
0233     {
0234         /* local process is unlocked */
0235         ret = ev_unlocked;
0236     }
0237 
0238     return ret;
0239 }
0240 
0241 /**
0242  * Peform ping lock. In case if
0243  * lock2 is locked, unlock
0244  * in case if lock2 is unlocked, lock
0245  */
0246 exprivate int ping_lock(void *ctx)
0247 {
0248     ndrx_locksm_ctx_t *lock_ctx = (ndrx_locksm_ctx_t *)ctx;
0249     int ret=ev_ok;
0250     long max_seq;
0251     if ( EXSUCCEED!=ndrx_exsinglesv_file_chkpid(NDRX_LOCK_FILE_1, ndrx_G_exsinglesv_conf.lockfile_1) )
0252     {
0253         ret=ev_err;
0254         goto out;
0255     }
0256 
0257     /* generate new seq */
0258     lock_ctx->new_sequence++;
0259 
0260     TP_LOG(log_info, "New sequence: %ld", lock_ctx->new_sequence);
0261 
0262     /* refresh seq in files... */
0263     if (EXSUCCEED!=ndrx_exsinglesv_ping_do(lock_ctx))
0264     {
0265         ret=ev_err;
0266         goto out;
0267     }
0268 
0269     /* if test point is set down here unlock main file */
0270     if (ndrx_G_systest_lockloss > 0)
0271     {
0272         TP_LOG(log_error, "SYSTEST: Simulating lock loss for file 1 (sleep after %d)",
0273             ndrx_G_systest_lockloss);
0274         ndrx_exsinglesv_file_unlock(NDRX_LOCK_FILE_1);
0275         sleep(ndrx_G_systest_lockloss);
0276     }
0277     
0278 out:
0279     return ret;
0280 }
0281 
0282 /**
0283  * Write cache timestamp back to shared memory...
0284  */
0285 exprivate int shm_refresh(void *ctx)
0286 {
0287     ndrx_locksm_ctx_t *lock_ctx = (ndrx_locksm_ctx_t *)ctx;
0288     int ret=ev_ok;
0289 
0290     if (EXSUCCEED!=ndrx_sg_do_refresh(ndrx_G_exsinglesv_conf.procgrp_lp_no, NULL,
0291         tpgetnodeid(), tpgetsrvid(), lock_ctx->new_refresh, lock_ctx->new_sequence))
0292     {
0293         ret=ev_err;
0294         goto out;
0295     }
0296 out:
0297     return ret;
0298 }
0299 
0300 /**
0301  * Remove the locks and return falure
0302  */
0303 exprivate int do_abort(void *ctx)
0304 {
0305     /* just close the locks  */
0306     ndrx_exsinglesv_uninit(EXFALSE, EXFALSE);
0307 
0308     return ev_abort;
0309 }
0310 
0311 /**
0312  * Abort the service and unlock shared memory
0313  */
0314 exprivate int do_abort_unlock(void *ctx)
0315 {
0316     /* just close the locks  */
0317     ndrx_exsinglesv_uninit(EXFALSE, EXTRUE);
0318 
0319     return ev_abort;
0320 }
0321 
0322 /**
0323  * Check maintenance mode.
0324  */
0325 exprivate int chk_mmon(void *ctx)
0326 {
0327     ndrx_locksm_ctx_t *lock_ctx = (ndrx_locksm_ctx_t *)ctx;
0328     int ret=ev_ok;
0329 
0330     if (lock_ctx->local.is_mmon)
0331     {
0332         /* maintenance mode is ON */
0333         TP_LOG(log_debug, "Singleton process group %d is in maintenance mode", 
0334                 ndrx_G_exsinglesv_conf.procgrp_lp_no);
0335         ret = ev_busy;
0336     }
0337     else if (ndrx_G_shmcfg->is_mmon)
0338     {
0339         /* maintenance mode is ON */
0340         TP_LOG(log_debug, "Application is in maintenance mode");
0341         ret = ev_busy;
0342     }
0343     else
0344     {
0345         /* maintenance mode is OFF */
0346         ret = ev_ok;
0347     }
0348 
0349     return ret;
0350 }
0351 
0352 /**
0353  * Perform lock file locking and
0354  * mark SHM as locked (if succeed)
0355  */
0356 exprivate int do_lock(void *ctx)
0357 {
0358     ndrx_locksm_ctx_t *lock_ctx = (ndrx_locksm_ctx_t *)ctx;
0359     int ret=ev_ok;
0360     char *boot_script = NULL;
0361 
0362     if (!ndrx_G_exsinglesv_conf.locked1)
0363     {
0364         switch (ndrx_exsinglesv_file_lock(NDRX_LOCK_FILE_1, 
0365             ndrx_G_exsinglesv_conf.lockfile_1))
0366         {
0367             case NDRX_LOCKE_BUSY:
0368                 /* file is locked */
0369                 TP_LOG(log_info, "Singleton process group %d "
0370                         "is already locked (by other node)", 
0371                         ndrx_G_exsinglesv_conf.procgrp_lp_no);
0372                 ret = ev_busy;
0373                 goto out;
0374             case EXSUCCEED:
0375 
0376                 ndrx_G_exsinglesv_conf.locked1=EXTRUE;
0377 
0378                 /* update counters if locked, even if we wait afterwards
0379                  * the HB/ping will move forward, so that others would see
0380                  * that they are not locked..
0381                  */
0382 
0383                 /* extract the sequence number */
0384                 lock_ctx->new_sequence = ndrx_exsinglesv_sg_max_seq(lock_ctx);
0385 
0386                 if (EXFAIL==lock_ctx->new_sequence)
0387                 {
0388                     TP_LOG(log_error, "Failed get current sequence number");
0389                     ret=ev_err;
0390                     goto out;
0391                 }
0392 
0393                 lock_ctx->new_sequence+=G_atmi_env.sglockinc;
0394                 TP_LOG(log_warn, "New sequence number is %ld (old seq +%d)",
0395                     lock_ctx->new_sequence, G_atmi_env.sglockinc);
0396 
0397                 /* write stuff to ping */
0398                 if (EXSUCCEED!=ndrx_exsinglesv_ping_do(lock_ctx))
0399                 {
0400                     TP_LOG(log_error, "Initial ping failed");
0401                     ret=ev_err;
0402                     goto out;
0403                 }
0404 
0405                 break;
0406             default:
0407                 ret = ev_err;
0408                 goto out;  
0409         }
0410     }
0411 
0412     /* If this is first boot, lock immeditally */
0413     if (ndrx_G_exsinglesv_conf.first_boot)
0414     {
0415         ndrx_G_exsinglesv_conf.is_locked=EXTRUE;
0416     }
0417     else
0418     {
0419         long max_seq = ndrx_exsinglesv_sg_max_seq(lock_ctx);
0420 
0421         ndrx_G_exsinglesv_conf.wait_counter++;
0422 
0423         /* 
0424          * Verify that our sequence matches
0425          * (no other has booted in cluster)
0426          */
0427         if (lock_ctx->new_sequence!=max_seq)
0428         {
0429             NDRX_LOG(log_error, "ERROR: During recovery wait %d/%d, found that "
0430                     "our sequence %ld does not matches cluster seq %ld",
0431                     ndrx_G_exsinglesv_conf.wait_counter,
0432                     ndrx_G_exsinglesv_conf.locked_wait,
0433                     lock_ctx->new_sequence,
0434                         max_seq);
0435             userlog("ERROR: During recovery wait %d/%d, found that "
0436                     "our sequence %ld does not matches cluster seq %ld",
0437                     ndrx_G_exsinglesv_conf.wait_counter,
0438                     ndrx_G_exsinglesv_conf.locked_wait,
0439                     lock_ctx->new_sequence,
0440                         max_seq);
0441             ret=ev_err;
0442             goto out;
0443         }
0444     
0445         if (ndrx_G_exsinglesv_conf.wait_counter > ndrx_G_exsinglesv_conf.locked_wait)
0446         {
0447             /* we have waited enough, lock it */
0448             ndrx_G_exsinglesv_conf.is_locked=EXTRUE;
0449         }
0450         else
0451         {
0452 
0453             /* we have to wait more */
0454             TP_LOG(log_info, "Waiting after files locked %d/%d",
0455                 ndrx_G_exsinglesv_conf.wait_counter,
0456                 ndrx_G_exsinglesv_conf.locked_wait);
0457 
0458             ret=ev_wait;
0459             goto out;
0460         }
0461     }
0462 
0463     /* we are locked down here... */
0464     if (ndrx_G_exsinglesv_conf.first_boot
0465         && EXEOS!=ndrx_G_exsinglesv_conf.exec_on_bootlocked[0])
0466     {
0467         boot_script=ndrx_G_exsinglesv_conf.exec_on_bootlocked;
0468     }
0469     else if (!ndrx_G_exsinglesv_conf.first_boot
0470         && EXEOS!=ndrx_G_exsinglesv_conf.exec_on_locked[0])
0471     {
0472         boot_script=ndrx_G_exsinglesv_conf.exec_on_locked;
0473     }
0474 
0475     if (NULL!=boot_script)
0476     {
0477         /* execute boot script */
0478         TP_LOG(log_info, "Executing boot script: %s", boot_script);
0479 
0480         ret=system(boot_script);
0481 
0482         if (EXSUCCEED!=ret)
0483         {
0484             TP_LOG(log_error, "ERROR: Lock script [%s], "
0485                 "exited with %d", boot_script, ret);
0486             userlog("ERROR: Lock script [%s], "
0487                 "exited with %d", boot_script, ret);
0488             ret=ev_err;
0489             goto out;
0490         }
0491     }
0492 
0493     /* mark shm as locked by us too */
0494     TP_LOG(log_debug, "Lock to shared memory...");
0495 
0496     if (EXSUCCEED!=ndrx_sg_do_lock(ndrx_G_exsinglesv_conf.procgrp_lp_no, 
0497             tpgetnodeid(), tpgetsrvid(), (char *)(EX_PROGNAME),
0498             lock_ctx->new_refresh, lock_ctx->new_sequence))
0499     {
0500         ret=ev_err;
0501         goto out;
0502     }
0503 
0504 out:
0505     return ret;
0506 }
0507 
0508 /**
0509  * Run lock/check statemachine
0510  * @return 0 (on success), otherwise fail
0511  */
0512 expublic int ndrx_exsinglesv_sm_run(void)
0513 {
0514     return ndrx_sm_run((void *)M_locksm, NR_TRANS, st_get_singlegrp, 
0515         (void *)&M_ctx, LOG_FACILITY_TP);
0516 }
0517 
0518 /**
0519  * Runtime compilation of the state machine
0520  * @return 0 (on success), otherwise fail
0521  */
0522 expublic int ndrx_exsinglesv_sm_comp(void)
0523 {
0524     memset(&M_ctx, 0, sizeof(M_ctx));
0525     return ndrx_sm_comp((void *)M_locksm, st_count, NR_TRANS, st_do_lock);
0526 }
0527 /* vim: set ts=4 sw=4 et smartindent: */