Back to home page

Enduro/X

 
 

    


0001 /**
0002  * @brief System utilities
0003  *
0004  * @file sysutil.c
0005  */
0006 /* -----------------------------------------------------------------------------
0007  * Enduro/X Middleware Platform for Distributed Transaction Processing
0008  * Copyright (C) 2009-2016, ATR Baltic, Ltd. All Rights Reserved.
0009  * Copyright (C) 2017-2023, Mavimax, Ltd. All Rights Reserved.
0010  * This software is released under one of the following licenses:
0011  * AGPL (with Java and Go exceptions) or Mavimax's license for commercial use.
0012  * See LICENSE file for full text.
0013  * -----------------------------------------------------------------------------
0014  * AGPL license:
0015  *
0016  * This program is free software; you can redistribute it and/or modify it under
0017  * the terms of the GNU Affero General Public License, version 3 as published
0018  * by the Free Software Foundation;
0019  *
0020  * This program is distributed in the hope that it will be useful, but WITHOUT ANY
0021  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
0022  * PARTICULAR PURPOSE. See the GNU Affero General Public License, version 3
0023  * for more details.
0024  *
0025  * You should have received a copy of the GNU Affero General Public License along 
0026  * with this program; if not, write to the Free Software Foundation, Inc.,
0027  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0028  *
0029  * -----------------------------------------------------------------------------
0030  * A commercial use license is available from Mavimax, Ltd
0031  * contact@mavimax.com
0032  * -----------------------------------------------------------------------------
0033  */
0034 #include <ndrx_config.h>
0035 #include <string.h>
0036 #include <stdio.h>
0037 #include <stdint.h>
0038 #include <stdlib.h>
0039 #include <unistd.h>
0040 #include <memory.h>
0041 #include <errno.h>
0042 #include <dlfcn.h>
0043 #include <sys/mman.h>
0044 #include <fcntl.h>
0045 #include <signal.h>
0046 
0047 #include <ndrstandard.h>
0048 #include <exhash.h>
0049 #include <ndebug.h>
0050 #include <ndrxdcmn.h>
0051 #include <userlog.h>
0052 #include <ubf.h>
0053 #include <ubfutil.h>
0054 #include <sys_unix.h>
0055 #include <sys_mqueue.h>
0056 #include <utlist.h>
0057 #include <atmi_shm.h>
0058 #include <exregex.h>
0059 #include <sys/msg.h>
0060 #include <sys/sem.h>
0061 #include <sys/shm.h>
0062 #include <cpm.h>
0063 #include "atmi_tls.h"
0064 /*---------------------------Externs------------------------------------*/
0065 /*---------------------------Macros-------------------------------------*/
0066 /*---------------------------Enums--------------------------------------*/
0067 /*---------------------------Typedefs-----------------------------------*/
0068 
0069 /**
0070  * List of queues (for queued messages)
0071  */
0072 typedef struct qcache_hash qcache_hash_t;
0073 struct qcache_hash
0074 {
0075     char svcq[NDRX_MAX_Q_SIZE+1]; /* hash by this */
0076     char svcq_full[NDRX_MAX_Q_SIZE+1]; /* full queue name */
0077     
0078     EX_hash_handle hh; /* makes this structure hashable        */
0079 };
0080 
0081 exprivate MUTEX_LOCKDECL(M_q_cache_lock); /* lock the queue cache */
0082 /*---------------------------Globals------------------------------------*/
0083 /*---------------------------Statics------------------------------------*/
0084 
0085 exprivate qcache_hash_t *M_qcache = NULL; /* queue cache for non shm mode. */
0086 
0087 /**
0088  * Check is server running
0089  */
0090 expublic int ndrx_chk_server(char *procname, short srvid)
0091 {
0092     int ret = EXFALSE;
0093     char test_string3[NDRX_MAX_KEY_SIZE+4];
0094     char test_string4[64];
0095     string_list_t * list;
0096      
0097     snprintf(test_string3, sizeof(test_string3), "-k %s", G_atmi_env.rnd_key);
0098     snprintf(test_string4, sizeof(test_string4), "-i %hd", srvid);
0099     
0100     list =  ndrx_sys_ps_list(ndrx_sys_get_cur_username(), procname, 
0101             test_string3, test_string4, "");
0102     
0103     if (NULL!=list)
0104     {
0105         NDRX_LOG(log_debug, "process %s -i %hd running ok", procname, srvid);
0106         ret = EXTRUE;
0107     }
0108     else
0109     {
0110         NDRX_LOG(log_debug, "process %s -i %hd not running...", procname, srvid);
0111     }
0112     
0113     
0114     ndrx_string_list_free(list);
0115    
0116     return ret;
0117 }
0118 
0119 
0120 /**
0121  * Check is `ndrxd' daemon running
0122  */
0123 expublic int ndrx_chk_ndrxd(void)
0124 {
0125     int ret = EXFALSE;
0126     char test_string3[NDRX_MAX_KEY_SIZE+4];
0127     string_list_t * list;
0128      
0129     snprintf(test_string3, sizeof(test_string3), "-k %s", G_atmi_env.rnd_key);
0130     
0131     list =  ndrx_sys_ps_list(ndrx_sys_get_cur_username(), "ndrxd", 
0132             test_string3, "", "");
0133     
0134     if (NULL!=list)
0135     {
0136         NDRX_LOG(log_debug, "process `ndrxd' running ok");
0137         ret = EXTRUE;
0138     }
0139     else
0140     {
0141         NDRX_LOG(log_debug, "process `ndrxd' not running...");
0142     }
0143     
0144     ndrx_string_list_free(list);
0145     
0146     return ret;
0147 }
0148 
0149 /**
0150  * Return PID of ndrxd
0151  * @return PID of ndrxd or EXFAIL
0152  */
0153 expublic pid_t ndrx_ndrxd_pid_get(void)
0154 {
0155     pid_t ret = EXFAIL;
0156     FILE *f = NULL;
0157     char    pidbuf[64] = {EXEOS};
0158     
0159      if (NULL==(f=NDRX_FOPEN(G_atmi_env.ndrxd_pidfile, "r")))
0160     {
0161 
0162         if (ENOENT==errno)
0163         {
0164             NDRX_LOG(log_debug, "ndrxd PID file [%s] not found", 
0165                     G_atmi_env.ndrxd_pidfile);
0166         }
0167         else
0168         {
0169             NDRX_LOG(log_error, "Failed to open ndrxd PID file: [%s]: %s",
0170                     G_atmi_env.ndrxd_pidfile, strerror(errno));
0171         }
0172 
0173         goto out;
0174     }
0175 
0176      /* Read the PID */
0177     if (NULL==fgets(pidbuf, sizeof(pidbuf), f))
0178     {
0179         NDRX_LOG(log_error, "Failed to read from PID file: [%s]: %s",
0180                 G_atmi_env.ndrxd_pidfile, strerror(errno));
0181         goto out;
0182     }
0183     ret = atoi(pidbuf);
0184     
0185 out:
0186     
0187     if (NULL!=f)
0188     {
0189         NDRX_FCLOSE(f);
0190         f = NULL;
0191     }
0192     
0193     return ret;
0194 }
0195 
0196 /**
0197  * Prase client queue
0198  * @param pfx
0199  * @param proc
0200  * @param pid
0201  * @param th
0202  * @return 
0203  */
0204 expublic int ndrx_parse_clt_q(char *q, char *pfx, char *proc, pid_t *pid, long *th)
0205 {
0206     char tmp[NDRX_MAX_Q_SIZE+1];
0207     char *token;
0208     int ret = EXSUCCEED;
0209     
0210     pfx[0] = EXEOS;
0211     proc[0] = EXEOS;
0212     *pid = 0;
0213     *th = 0;
0214 
0215     if (NULL==strstr(q, NDRX_CLT_QREPLY_CHK))
0216     {
0217         NDRX_LOG(log_debug, "[%s] - not client Q", q);
0218         ret = EXFAIL;
0219         goto out;
0220     }
0221             
0222     NDRX_STRCPY_SAFE(tmp, q);
0223     
0224     /* get the first token */
0225     token = strtok(tmp, NDRX_FMT_SEP_STR);
0226 
0227     if (NULL!=token)
0228     {
0229         strcpy(pfx, token);
0230     }
0231     else
0232     {
0233         NDRX_LOG(log_error, "missing pfx");
0234         EXFAIL_OUT(ret);
0235     }
0236     
0237     
0238     token = strtok(NULL, NDRX_FMT_SEP_STR);
0239     if (NULL==token)
0240     {
0241         NDRX_LOG(log_error, "missing clt");
0242         EXFAIL_OUT(ret);
0243     }
0244     
0245     token = strtok(NULL, NDRX_FMT_SEP_STR);
0246     if (NULL==token)
0247     {
0248         NDRX_LOG(log_error, "missing reply");
0249         EXFAIL_OUT(ret);
0250     }
0251     
0252     token = strtok(NULL, NDRX_FMT_SEP_STR);
0253     
0254     if (NULL!=token)
0255     {
0256         strcpy(proc, token);
0257     }
0258     else
0259     {
0260         NDRX_LOG(log_error, "missing proc name");
0261         EXFAIL_OUT(ret);
0262     }
0263     
0264     token = strtok(NULL, NDRX_FMT_SEP_STR);
0265     
0266     if (NULL!=token)
0267     {
0268         *pid=atoi(token);
0269     }
0270     else
0271     {
0272         NDRX_LOG(log_error, "missing proc pid");
0273         EXFAIL_OUT(ret);
0274     }
0275     
0276     token = strtok(NULL, NDRX_FMT_SEP_STR);
0277     
0278     if (NULL!=token)
0279     {
0280         *th=atol(token);
0281     }
0282     else
0283     {
0284         NDRX_LOG(log_error, "missing proc th");
0285         EXFAIL_OUT(ret);
0286     }
0287     
0288 out:
0289     return ret;
0290 }
0291 
0292 /**
0293  * Delete user resources (by username)
0294  */
0295 expublic void ndrx_down_userres(void)
0296 {
0297     int i;
0298     mdrx_sysv_res_t *sysvres;
0299     ndrx_growlist_t g;
0300 
0301     NDRX_LOG(log_warn, "Remove user specific resources - System V queues");
0302     memset(&g, 0, sizeof(g));
0303     if (EXSUCCEED==ndrx_sys_sysv_user_res(&g, NDRX_SV_RESTYPE_QUE))
0304     {
0305         sysvres = (mdrx_sysv_res_t *)g.mem;
0306         for (i=0; i<=g.maxindexused; i++)
0307         {
0308             NDRX_LOG(log_warn, "Removing QID=%u", sysvres[i].id);
0309             if (EXSUCCEED!=msgctl(sysvres[i].id, IPC_RMID, NULL))
0310             {
0311                 NDRX_LOG(log_error, "Failed to remove qid %u: %s",
0312                         sysvres[i].id, strerror(errno));
0313             }
0314         }
0315 
0316         ndrx_growlist_free(&g);
0317     }
0318 
0319     NDRX_LOG(log_warn, "Remove user specific resources - System V semaphores");
0320     memset(&g, 0, sizeof(g));
0321     if (EXSUCCEED==ndrx_sys_sysv_user_res(&g, NDRX_SV_RESTYPE_SEM))
0322     {
0323         sysvres = (mdrx_sysv_res_t *)g.mem;
0324         for (i=0; i<=g.maxindexused; i++)
0325         {
0326             NDRX_LOG(log_warn, "Removing SEM ID=%u", sysvres[i].id);
0327             if (EXSUCCEED!=semctl(sysvres[i].id, 0, IPC_RMID))
0328             {
0329                 NDRX_LOG(log_error, "Failed to remove sem id %u: %s",
0330                         sysvres[i].id, strerror(errno));
0331             }
0332         }
0333         ndrx_growlist_free(&g);
0334     }
0335     
0336     
0337     NDRX_LOG(log_warn, "Remove user specific resources - System V shard mem");
0338     memset(&g, 0, sizeof(g));
0339     if (EXSUCCEED==ndrx_sys_sysv_user_res(&g, NDRX_SV_RESTYPE_SHM))
0340     {
0341         sysvres = (mdrx_sysv_res_t *)g.mem;
0342         for (i=0; i<=g.maxindexused; i++)
0343         {
0344             NDRX_LOG(log_warn, "Removing SHM ID=%u", sysvres[i].id);
0345             if (EXSUCCEED!=shmctl(sysvres[i].id, IPC_RMID, NULL))
0346             {
0347                 NDRX_LOG(log_error, "Failed to remove sem id %u: %s",
0348                         sysvres[i].id, strerror(errno));
0349             }
0350         }
0351         ndrx_growlist_free(&g);
0352     }
0353     
0354     return;
0355 }
0356 
0357 
0358 /**
0359  * Kill the system running (the xadmin dies last...)
0360  * @param[in] user_res remove user specific resources (this might kill other apps
0361  *  resources too if running under the same user). Currently performs System V
0362  *  resource removal.
0363  */
0364 expublic int ndrx_down_sys(char *qprefix, char *qpath, int is_force, int user_res)
0365 {
0366     int ret = EXSUCCEED;
0367 #define DOWN_KILL_SIG   1
0368     int signals[] = {SIGTERM, SIGKILL, EXFAIL};
0369     int i;
0370     string_list_t* qlist = NULL;
0371     string_list_t* srvlist = NULL;
0372     string_list_t* srvlist2 = NULL;
0373     string_list_t* ndrxdlist = NULL;
0374     string_list_t* cpmsrvs = NULL;
0375     string_list_t* xadminlist = NULL;
0376     string_list_t* cltchildren = NULL;
0377     string_list_t* elt = NULL;
0378     string_list_t* elt2 = NULL;
0379     string_list_t* qclts = NULL;
0380     char pfx[NDRX_MAX_Q_SIZE+1];
0381     char proc[NDRX_MAX_Q_SIZE+1];
0382     pid_t pid, ppid;
0383     long th;
0384     char test_string2[NDRX_MAX_KEY_SIZE+4];
0385     
0386     /* probably we can configure these directly to use IDS: */
0387     char srvinfo[NDRX_SHM_PATH_MAX];
0388     char svcinfo[NDRX_SHM_PATH_MAX];
0389     char brinfo[NDRX_SHM_PATH_MAX];
0390 
0391     char routcrit[NDRX_SHM_PATH_MAX];
0392     char routsvc[NDRX_SHM_PATH_MAX];
0393 
0394     char *shm[] = {srvinfo, svcinfo, brinfo, routcrit, routsvc};
0395     char *ndrxd_pid_file = getenv(CONF_NDRX_DPID);
0396     int max_signals = 2;
0397     int was_any = EXFALSE;
0398     pid_t my_pid = getpid();
0399     char *username;
0400     char env_mask[PATH_MAX];
0401     regex_t srv2rex;
0402     int srv2rex_compiled = EXFALSE;
0403     NDRX_LOG(log_warn, "****** Forcing system down ******");
0404     
0405     
0406     snprintf(srvinfo, sizeof(srvinfo), NDRX_SHM_SRVINFO, qprefix);
0407     snprintf(svcinfo, sizeof(svcinfo), NDRX_SHM_SVCINFO, qprefix);
0408     snprintf(brinfo, sizeof(brinfo),  NDRX_SHM_BRINFO,  qprefix);
0409     
0410     /* how about posix ? shms? remove too...
0411      * as processes will detach and finally forget about them
0412      */
0413     snprintf(routcrit, sizeof(routcrit),NDRX_SHM_ROUTCRIT,  qprefix);
0414     snprintf(routsvc, sizeof(routsvc),  NDRX_SHM_ROUTSVC,  qprefix);
0415     
0416     snprintf(test_string2, sizeof(test_string2), "-k %s", G_atmi_env.rnd_key);
0417     
0418 
0419     if (is_force)
0420     {
0421         signals[0] = SIGKILL;
0422     }
0423     
0424     /* list all queues */
0425     qlist = ndrx_sys_mqueue_list_make(qpath, &ret);
0426     
0427     if (EXSUCCEED!=ret)
0428     {
0429         NDRX_LOG(log_error, "posix queue listing failed... continue...!");
0430         ret = EXSUCCEED;
0431         qlist = NULL;
0432     }
0433     
0434     username = ndrx_sys_get_cur_username();
0435     
0436     /* THIS IS FIRST!!!! We do not want continues respawning!!!
0437      * kill any ndrxd, as they can respawn xatmi servers 
0438      * But... tprecover might running and restoring ndrxd back
0439      * thus we need somehow in loop kill both with -9
0440      */
0441     NDRX_LOG(log_debug, "Killing the ndrxd and tprecover...");
0442     do
0443     {
0444         was_any = EXFALSE;
0445         
0446         ndrxdlist = ndrx_sys_ps_list(username, test_string2, 
0447                 "", "", "[\\s/ ]*ndrxd[\\s ]");
0448         
0449         srvlist = ndrx_sys_ps_list(username, test_string2, 
0450                 "", "", "[\\s/ ]*tprecover[\\s ]");
0451         
0452         LL_FOREACH(ndrxdlist,elt)
0453         {
0454             if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid))
0455             {
0456                  NDRX_LOG(log_error, "! killing (ndrxd)  sig=%d "
0457                          "pid=[%d] (%s)", signals[DOWN_KILL_SIG], pid, elt->qname);
0458 
0459                  if (EXSUCCEED!=kill(pid, signals[DOWN_KILL_SIG]))
0460                  {
0461                      NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0462                              signals[i], pid, strerror(errno));
0463                  }
0464                  else
0465                  {
0466                     was_any = EXTRUE;
0467                  }
0468             }
0469         }
0470         
0471         LL_FOREACH(srvlist,elt)
0472         {
0473             if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid))
0474             {
0475                  NDRX_LOG(log_error, "! killing (tprecover)  sig=%d "
0476                          "pid=[%d] (%s)", signals[DOWN_KILL_SIG], pid, elt->qname);
0477 
0478                  if (EXSUCCEED!=kill(pid, signals[DOWN_KILL_SIG]))
0479                  {
0480                      NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0481                              signals[i], pid, strerror(errno));
0482                  }
0483                  else
0484                  {
0485                     was_any = EXTRUE;
0486                  }
0487             }
0488         }
0489         
0490         ndrx_string_list_free(ndrxdlist);
0491         ndrxdlist = NULL;
0492         ndrx_string_list_free(srvlist);
0493         srvlist = NULL;
0494         
0495     } while(was_any);
0496     
0497     /* Get the pid of cpmsrv - from queue, I guess or from PS output..? */
0498     
0499     /* 
0500      * List child processes of the cpmsrv
0501      * and kill the cpmsrv and the child processes
0502      */
0503     NDRX_LOG(log_debug, "Searching child processes of the cpmsrv");
0504             
0505     cpmsrvs = ndrx_sys_ps_list(username, test_string2, 
0506                 "", "", "[\\s/ ]*cpmsrv[\\s ]");
0507     
0508     LL_FOREACH(cpmsrvs,elt2)
0509     {
0510         /* List the children of the cpmsrv... */
0511         if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt2->qname, &ppid))
0512         {
0513             
0514             NDRX_LOG(log_warn, "CPMSRV PID = %d, extracting children", ppid);
0515             
0516             qclts = ndrx_sys_ps_getchilds(ppid);
0517             was_any = EXFALSE;
0518             
0519             NDRX_LOG(log_warn, "! Children extracted, about kill the cpmsrv...");
0520             /* At this moment we must kill the CPM, as it will spawn the children 
0521              * The children list extract and parent can be killed
0522              */
0523             if (EXSUCCEED!=kill(ppid, signals[0]))
0524             {
0525                 NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0526                         signals[0], ppid, strerror(errno));
0527             }
0528 
0529             sleep(EX_KILL_SLEEP_SECS);
0530             
0531             if (EXSUCCEED!=kill(ppid, signals[1]))
0532             {
0533                 NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0534                         signals[1], ppid, strerror(errno));
0535             }
0536             
0537             NDRX_LOG(log_warn, "Now kill the child processes one by one");
0538             for (i=0; i<max_signals; i++)
0539             {
0540                 LL_FOREACH(qclts,elt)
0541                 {
0542                     /* Parse out process name & pid */
0543                     NDRX_LOG(log_warn, "processing proc: [%s]", elt->qname);
0544 
0545                     if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid))
0546                     {
0547                         if (0==i)
0548                         {
0549                             ndrx_proc_children_get_recursive(&cltchildren, pid);
0550                         }
0551 
0552                         NDRX_LOG(log_error, "! killing  sig=%d "
0553                                 "pid=[%d] mypid=[%d]", signals[i], pid, my_pid);
0554 
0555                         if (EXSUCCEED!=kill(pid, signals[i]))
0556                         {
0557                             NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0558                                     signals[i], pid, strerror(errno));
0559                         }
0560                         was_any = EXTRUE;
0561                     }
0562                 }
0563                 
0564                 if (0==i && was_any)
0565                 {
0566                     sleep(EX_KILL_SLEEP_SECS);
0567                 }
0568             }
0569             
0570             ndrx_string_list_free(qclts);
0571             qclts = NULL;
0572             
0573             /* kill the children of the children */
0574             ndrx_proc_kill_list(cltchildren);
0575             ndrx_string_list_free(cltchildren);
0576             
0577             cltchildren = NULL;
0578             
0579             qclts = NULL;
0580         }
0581         else
0582         {
0583             NDRX_LOG(log_error, "Failed to extract pid from: [%s]", elt->qname);
0584         }
0585     }
0586     
0587     /* remove the CPM/CLT SHM & clts */
0588     ndrx_cltshm_down(signals, &was_any);
0589     
0590     /* 
0591      * kill all servers 
0592      */
0593     was_any = EXFALSE;
0594     NDRX_LOG(log_warn, "Removing server processes for user [%s] and key [%s]", 
0595         username, test_string2);
0596     
0597     srvlist = ndrx_sys_ps_list(username, test_string2, 
0598                 "", "", "");
0599     
0600     for (i=0; i<max_signals; i++)
0601     {
0602         LL_FOREACH(srvlist,elt)
0603         {
0604             /* Parse out process name & pid */
0605             NDRX_LOG(log_warn, "processing proc: [%s]", elt->qname);
0606             
0607             if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid))
0608             {
0609                  NDRX_LOG(log_error, "! killing  sig=%d "
0610                          "pid=[%d] (%s)", signals[i], pid, elt->qname);
0611                  
0612                  if (EXSUCCEED!=kill(pid, signals[i]))
0613                  {
0614                      NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0615                              signals[i], pid, strerror(errno));
0616                  }
0617                  was_any = EXTRUE;
0618             }
0619         }
0620         if (0==i && was_any)
0621         {
0622             sleep(EX_KILL_SLEEP_SECS);
0623         }
0624     }
0625 
0626     /* Kill servers by looking up environment variables!!!
0627      * needs to implement API calls for linux/mac/freebsd/aix/solaris
0628      */
0629     
0630     was_any = EXFALSE;
0631     
0632     snprintf(env_mask, sizeof(env_mask), "%s.{0,2}%s.*-i [0-9]+.*--",
0633                 CONF_NDRX_SVCLOPT, test_string2);
0634     
0635     /* Compile regex */
0636     if (EXSUCCEED!=ndrx_regcomp(&srv2rex, env_mask))
0637     {
0638         NDRX_LOG(log_error, "Failed to compile regexp [%s]", env_mask);
0639         EXFAIL_OUT(ret);
0640     }
0641     srv2rex_compiled = EXTRUE;
0642     
0643     NDRX_LOG(log_warn, "Removing server processes for user [%s] and env mask [%s]", 
0644         username, test_string2);
0645     
0646     srvlist2 = ndrx_sys_ps_list(username, "", 
0647                 "", "", "");
0648             
0649     for (i=0; i<max_signals; i++)
0650     {
0651         LL_FOREACH(srvlist2,elt)
0652         {
0653             /* Parse out process name & pid */
0654             NDRX_LOG(log_warn, "processing proc: [%s]", elt->qname);
0655             
0656             if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid) &&
0657                     EXTRUE==ndrx_sys_env_test(pid, &srv2rex))
0658             {
0659                  NDRX_LOG(log_error, "! killing  sig=%d "
0660                          "pid=[%d] (%s)", signals[i], pid, elt->qname);
0661                  
0662                  if (EXSUCCEED!=kill(pid, signals[i]))
0663                  {
0664                      NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0665                              signals[i], pid, strerror(errno));
0666                  }
0667                  was_any = EXTRUE;
0668             }
0669         }
0670         if (0==i && was_any)
0671         {
0672             sleep(EX_KILL_SLEEP_SECS);
0673         }
0674     }
0675     
0676     NDRX_LOG(log_warn, "Removing all client processes.. (by Q)");
0677     /* Kill the children against the Q 
0678      * DO this after the servers. So that servers have no chance to respawn
0679      * any client
0680      */
0681     for (i=0; i<max_signals; i++)
0682     {
0683         LL_FOREACH(qlist,elt)
0684         {
0685             NDRX_LOG(log_debug, "Testing q [%s]", elt->qname);
0686             
0687             /* if not print all, then skip this queue */
0688             if (0!=strncmp(elt->qname, 
0689                     qprefix, strlen(qprefix)))
0690             {
0691                 continue;
0692             }
0693             
0694             /* Parse out process name & pid */
0695             NDRX_LOG(log_warn, "processing q: [%s]", elt->qname);
0696             
0697             if (EXSUCCEED==ndrx_parse_clt_q(elt->qname, pfx, proc, &pid, &th) &&
0698                     0!=strcmp(proc, "xadmin"))
0699             {
0700                 if (0==i)
0701                 {
0702                     ndrx_proc_children_get_recursive(&cltchildren, pid);
0703                 }
0704                 
0705                 NDRX_LOG(log_error, "! killing  sig=%d pfx=[%s] proc=[%s] "
0706                         "pid=[%d] th=[%ld]", signals[i], pfx, proc, pid, th);
0707                 if (EXSUCCEED!=kill(pid, signals[i]))
0708                 {
0709                     NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0710                             signals[i], pid, strerror(errno));
0711                 }
0712                 else
0713                 {
0714                    was_any = EXTRUE;
0715                 }
0716             }
0717         }
0718         
0719         if (0==i && was_any)
0720         {
0721             sleep(EX_KILL_SLEEP_SECS);
0722         }
0723     }
0724     
0725     /* kill the children of the children */
0726     ndrx_proc_kill_list(cltchildren);
0727     ndrx_string_list_free(cltchildren);
0728     cltchildren = NULL;
0729 
0730     /* remove all xadmins... */
0731     NDRX_LOG(log_warn, "Removing other xadmins...");
0732     was_any = EXFALSE;
0733     xadminlist = ndrx_sys_ps_list(username, "xadmin", 
0734         "", "", "");
0735     
0736     for (i=0; i<max_signals; i++)
0737     {
0738         LL_FOREACH(xadminlist,elt)
0739         {
0740             /* Parse out process name & pid */
0741             NDRX_LOG(log_warn, "processing proc: [%s]", elt->qname);
0742             
0743             if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid) && pid!=my_pid)
0744             {
0745                  NDRX_LOG(log_error, "! killing  sig=%d "
0746                          "pid=[%d] mypid=[%d]", signals[i], pid, my_pid);
0747                  
0748                  if (EXSUCCEED!=kill(pid, signals[i]))
0749                  {
0750                      NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0751                              signals[i], pid, strerror(errno));
0752                  }
0753                  else
0754                  {
0755                     was_any = EXTRUE;
0756                  }
0757             }
0758         }
0759         
0760         if (0==i && was_any)
0761         {
0762             sleep(EX_KILL_SLEEP_SECS);
0763         }
0764     }
0765     
0766     /* Remove all queues */
0767     NDRX_LOG(log_warn, "Removing queues...");
0768     
0769     LL_FOREACH(qlist,elt)
0770     {
0771         /* if not print all, then skip this queue */
0772         if (0!=strncmp(elt->qname, 
0773                 qprefix, strlen(qprefix)))
0774         {
0775             continue;
0776         }
0777 
0778         /* Parse out process name & pid */
0779         NDRX_LOG(log_warn, "Removing q: [%s]", elt->qname);
0780 
0781         if (EXSUCCEED!=ndrx_mq_unlink(elt->qname))
0782         {
0783             NDRX_LOG(log_error, "failed to remove q [%s]: %s",
0784                     elt->qname, strerror(errno));
0785         }
0786     }
0787     
0788     NDRX_LOG(log_warn, "Removing shared memory...");
0789     
0790     for (i=0; i<N_DIM(shm); i++)
0791     {
0792         NDRX_LOG(log_warn, "Unlinking [%s]", shm[i]);
0793         
0794         if (EXSUCCEED!=ndrx_shm_remove_name(shm[i], G_atmi_env.ipckey))
0795         {
0796             NDRX_LOG(log_warn, "shm_unlink [%s] failed: %s (ignore)...", 
0797                     shm[i], strerror(errno));
0798         }
0799     }
0800     
0801     NDRX_LOG(log_warn, "Removing semaphores...");
0802     
0803     ndrxd_sem_delete_with_init(qprefix);
0804     
0805     NDRX_LOG(log_warn, "Removing ndrxd pid file");
0806     
0807     if (NULL!=ndrxd_pid_file && EXEOS!=ndrxd_pid_file[0])
0808     {
0809         if (EXSUCCEED!=unlink(ndrxd_pid_file))
0810         {
0811             NDRX_LOG(log_error, "Failed to unlink [%s]: %s", 
0812                     ndrxd_pid_file, strerror(errno));
0813         }
0814     }
0815     else
0816     {
0817         NDRX_LOG(log_error, "Missing ndrxd PID file...");
0818     }
0819     
0820     NDRX_LOG(log_warn, "Terminating polling sub-system");
0821     
0822     if (EXSUCCEED!=ndrx_epoll_down(EXTRUE))
0823     {
0824         NDRX_LOG(log_error, "Failed to terminate poller");
0825     }
0826     
0827     if (user_res)
0828     {
0829         ndrx_down_userres();
0830     }
0831     
0832     NDRX_LOG(log_warn, "****** Done ******");
0833     
0834 out:
0835 
0836     ndrx_string_list_free(qlist);
0837     ndrx_string_list_free(srvlist);
0838     ndrx_string_list_free(srvlist2);
0839     ndrx_string_list_free(xadminlist);
0840     ndrx_string_list_free(cpmsrvs);
0841     ndrx_string_list_free(qclts);
0842     ndrx_string_list_free(ndrxdlist);
0843     ndrx_string_list_free(cltchildren);
0844     
0845     if (srv2rex_compiled)
0846     {
0847         ndrx_regfree(&srv2rex);
0848     }
0849     
0850     return ret;
0851 }
0852 
0853 
0854 /**
0855  * Kill process by mask.
0856  * @param m
0857  * @return 
0858  */
0859 expublic int ndrx_killall(char *mask)
0860 {
0861     string_list_t* plist = NULL;
0862     string_list_t* elt = NULL;
0863     int signals[] = {SIGTERM, SIGKILL};
0864     pid_t pid, curprocpid;
0865     int was_any = EXFALSE;
0866     int i;
0867     ndrx_intmap_t *pshash = NULL, *parentshash = NULL;
0868     int ret = EXFAIL;
0869     
0870     /* list the searched values... */
0871     plist = ndrx_sys_ps_list(mask, "", "", "", "");
0872     
0873     /* build ps hash */
0874     if (EXSUCCEED!=ndrx_sys_ps_list2hash(plist, &pshash))
0875     {
0876         NDRX_LOG(log_error, "Failed to build pslist hash! Out of memory?");
0877         EXFAIL_OUT(ret);
0878     }
0879     
0880     curprocpid=getpid();
0881     /* build parents hash of current process, to protect it from suicide... */
0882     if (EXSUCCEED!=ndrx_sys_ps_hash2parents(&pshash, curprocpid, &parentshash))
0883     {
0884         NDRX_LOG(log_error, "Failed to build parents hash! Out of memory?");
0885         EXFAIL_OUT(ret);
0886     }
0887     
0888     for (i=0; i<2; i++)
0889     {
0890         LL_FOREACH(plist,elt)
0891         {
0892             /* Parse out process name & pid */
0893             NDRX_LOG(log_warn, "processing proc: [%s]", elt->qname);
0894             
0895             if (EXSUCCEED==ndrx_proc_pid_get_from_ps(elt->qname, &pid) && 
0896                     pid!=0)
0897             {
0898                 if (NULL==ndrx_intmap_find(&parentshash, pid) && pid!=curprocpid)
0899                 {
0900                     NDRX_LOG(log_error, "! killing  sig=%d "
0901                             "pid=[%d]", signals[i], pid);
0902 
0903                     if (EXSUCCEED!=kill(pid, signals[i]))
0904                     {
0905                         NDRX_LOG(log_error, "failed to kill with signal %d pid %d: %s",
0906                                 signals[i], pid, strerror(errno));
0907                     }
0908                     was_any = EXTRUE;
0909                     ret = EXSUCCEED;
0910                 }
0911                 else
0912                 {
0913                     NDRX_LOG(log_warn, "No suicide pid=%d", pid);
0914                 }
0915             }
0916         }
0917         if (0==i && was_any)
0918         {
0919             sleep(EX_KILL_SLEEP_SECS);
0920         }
0921     }
0922     
0923 out:
0924     ndrx_intmap_remove (&pshash);
0925     ndrx_intmap_remove (&parentshash);
0926     ndrx_string_list_free(plist);
0927 
0928     
0929     return ret;
0930 }
0931 
0932 /**
0933  * Checks for queue existance
0934  * @param qpath
0935  * @return TRUE/FALSE
0936  */
0937 expublic int ndrx_q_exists(char *qpath)
0938 {
0939     mqd_t tmp = ndrx_mq_open(qpath, O_RDONLY, O_NONBLOCK, NULL);
0940     
0941     if ((mqd_t)EXFAIL!=tmp)
0942     {
0943         ndrx_mq_close(tmp);
0944         return EXTRUE;
0945     }
0946     
0947     return EXFALSE;
0948 }
0949 
0950 /**
0951  * Check service in cache. If found but cannot open, then remove from cache
0952  * @param q
0953  * @return SUCCEED (found & replaced)/FAIL
0954  */
0955 exprivate int chk_cached_svc(char *svcq, char *svcq_full)
0956 {
0957     qcache_hash_t * ret = NULL;
0958    
0959     MUTEX_LOCK_V(M_q_cache_lock);
0960     
0961     EXHASH_FIND_STR( M_qcache, svcq, ret);
0962     
0963     if (NULL==ret)
0964     {
0965         NDRX_LOG(log_debug, "Service q [%s] not in cache", svcq);
0966         goto out;
0967     }
0968     else
0969     {
0970         NDRX_LOG(log_debug, "Service q [%s] found in cache, testing...", svcq);
0971         if (ndrx_q_exists(svcq))
0972         {
0973             NDRX_LOG(log_debug, "cached queue exists ok");
0974         }
0975         else
0976         {
0977             NDRX_LOG(log_warn, "Cached queue [%s] does not exists", svcq);
0978             EXHASH_DEL(M_qcache, ret);
0979             NDRX_FREE(ret);
0980             ret=NULL;
0981         }
0982     }
0983     
0984 out:
0985     MUTEX_UNLOCK_V(M_q_cache_lock);
0986     
0987     if (NULL==ret)
0988         return EXFAIL;
0989     else
0990         return EXSUCCEED;
0991 }
0992 
0993 /**
0994  * Add add full queue mapping to cache
0995  * @param q
0996  * @param fullq
0997  * @return 
0998  */
0999 exprivate int add_cached_svc(char *svcq, char *svcq_full)
1000 {
1001     qcache_hash_t * ret = NDRX_CALLOC(1, sizeof(qcache_hash_t));
1002     
1003     MUTEX_LOCK_V(M_q_cache_lock);
1004     
1005     if (NULL==ret)
1006     {
1007         NDRX_LOG(log_error, "Failed to alloc qcache_hash_t: %s", strerror(errno));
1008         userlog("Failed to alloc qcache_hash_t: %s", strerror(errno));
1009     }
1010     
1011     NDRX_STRCPY_SAFE(ret->svcq, svcq);
1012     NDRX_STRCPY_SAFE(ret->svcq_full, svcq_full);
1013     
1014     EXHASH_ADD_STR( M_qcache, svcq, ret );
1015     
1016     MUTEX_UNLOCK_V(M_q_cache_lock);
1017     
1018     if (NULL!=ret)
1019         return EXSUCCEED;
1020     else
1021         return EXFAIL;
1022 }
1023 
1024 /**
1025  * Return cached queue (usable in poll mode).
1026  * @param q
1027  * @return SUCCEED/FAIL
1028  */
1029 expublic int ndrx_get_cached_svc_q(char *q)
1030 {
1031     int ret=EXSUCCEED;
1032     int found = EXFALSE;
1033     string_list_t* qlist = NULL;
1034     string_list_t* elt = NULL;
1035     char svcq[NDRX_MAX_Q_SIZE+1];
1036     
1037     NDRX_STRCPY_SAFE(svcq, q);
1038     
1039     if (EXSUCCEED==chk_cached_svc(svcq, q))
1040     {
1041         NDRX_LOG(log_info, "Got cached service: [%s]", q);
1042         return EXSUCCEED;
1043     }
1044     
1045     qlist = ndrx_sys_mqueue_list_make(G_atmi_env.qpath, &ret);
1046     
1047     if (EXSUCCEED!=ret)
1048     {
1049         NDRX_LOG(log_error, "posix queue listing failed!");
1050         EXFAIL_OUT(ret);
1051     }
1052     
1053     strcat(q, NDRX_FMT_SEP_STR);
1054     
1055     LL_FOREACH(qlist,elt)
1056     {
1057         /* if not print all, then skip this queue */
1058         if (0==strncmp(elt->qname,  q, strlen(q)))
1059         {
1060             strcpy(q, elt->qname);
1061             NDRX_LOG(log_debug, "Non shm mode, found Q: [%s]", q);
1062             found = EXTRUE;
1063             break;
1064         }
1065     }
1066     
1067     if (!found)
1068     {
1069         NDRX_LOG(log_error, "No servers for [%s] according to Q list", q);
1070         EXFAIL_OUT(ret);
1071     }
1072     
1073     /* save the server in cache... */
1074     add_cached_svc(svcq, q);
1075     
1076 out:
1077     if (NULL!=qlist)
1078     {
1079         ndrx_string_list_free(qlist);
1080     }
1081     return ret;
1082 }
1083 
1084 /**
1085  * Process ping response message
1086  * @param reply reply buffer
1087  * @param len buffer len received
1088  * @return EXSUCCEED/EXFAIL
1089  */
1090 exprivate int ndrx_ndrxd_ping_rsp(command_reply_t *reply, size_t reply_len)
1091 {
1092     int ret=EXSUCCEED;
1093     command_reply_srvping_t *ping_reply = (command_reply_srvping_t *)reply;
1094     
1095     if (reply_len < sizeof(command_reply_t))
1096     {
1097         userlog("NDRXD PING FAIL: Expected reply size: %d got %d!", 
1098                 sizeof(command_reply_t), reply_len);
1099         NDRX_LOG(log_error, "NDRXD PING FAIL: Expected reply size: %d got %d!", 
1100                 sizeof(command_reply_t), reply_len);
1101         
1102         EXFAIL_OUT(ret);
1103     }
1104     else if (NDRXD_COM_DPING_RP!=reply->command)
1105     {
1106         userlog("NDRXD PING WARNING: Expected reply command %d got %d -> wait next",
1107                 NDRXD_COM_DPING_RP, reply->command);
1108         NDRX_LOG(log_error, "NDRXD PING WARNING: Expected reply command %d "
1109                 "got %d -> wait next",
1110                 NDRXD_COM_DPING_RP, reply->command);
1111         
1112         reply->flags|=NDRXD_CALL_FLAGS_RSPHAVE_MORE;
1113         goto out;
1114     }
1115     else if (reply_len != sizeof(command_reply_srvping_t))
1116     {
1117         /* Invalid size of reply command buffer  */
1118         userlog("NDRXD PING FAIL: Expected reply size: %d got %d!", 
1119                 sizeof(command_reply_srvping_t), reply_len);
1120         NDRX_LOG(log_error, "NDRXD PING FAIL: Expected reply size: %d got %d!", 
1121                 sizeof(command_reply_srvping_t), reply_len);
1122         EXFAIL_OUT(ret);
1123     }
1124     else if (ping_reply->seq!=G_atmi_tls->ndrxd_ping_seq)
1125     {
1126         userlog("ndrxd ping reply out of sequence, expected: %d, got %d -> wait next",
1127                 G_atmi_tls->ndrxd_ping_seq, ping_reply->seq);
1128         NDRX_LOG(log_error, "ndrxd ping reply out of sequence, expected: %d, "
1129                 "got %d -> wait next",
1130                 G_atmi_tls->ndrxd_ping_seq, ping_reply->seq);
1131         reply->flags|=NDRXD_CALL_FLAGS_RSPHAVE_MORE;
1132         goto out;
1133     }
1134     else
1135     {
1136         NDRX_LOG(log_debug, "Ping reply with seq=%d ok", ping_reply->seq);
1137     }
1138     
1139 out:
1140     return ret;
1141 }
1142 
1143 /**
1144  * Perform ndrxd ping.
1145  * Also this should ignore out of the sequence messages.
1146  * Timeout is controlled by standard NDRXD_TOUT or by tptoutset(3)
1147  * @param[out] p_seq ping sequence number
1148  * @param[out] p_time_msec ptr to ping milliseconds when succeed
1149  * @param[in] listen_q queue on which we wait for response
1150  * @param[in] listen_q_str queue string for putting in message for reply
1151  * @return EXSUCCEED/EXFAIL (ping failed, timeout, no equeue, etc...)
1152  */
1153 expublic int ndrx_ndrxd_ping(int *p_seq, long *p_time_msec,
1154                     mqd_t listen_q, char * listen_q_str)
1155 {
1156     int ret=EXSUCCEED;
1157     command_srvping_t req;
1158     size_t  send_size=sizeof(req);
1159     ndrx_stopwatch_t tim;
1160     
1161     /* perform TLS entry */
1162     ATMI_TLS_ENTRY;
1163     
1164     memset(&req, 0, sizeof(req));
1165     
1166     G_atmi_tls->ndrxd_ping_seq++;
1167     
1168     if (NDRX_NDRXD_PING_SEQ_MAX < G_atmi_tls->ndrxd_ping_seq)
1169     {
1170         G_atmi_tls->ndrxd_ping_seq=1;
1171     }
1172     
1173     *p_seq = G_atmi_tls->ndrxd_ping_seq;
1174     req.seq = G_atmi_tls->ndrxd_ping_seq;
1175     
1176     ndrx_stopwatch_reset(&tim);
1177     
1178     /* we need the listen_q blocked */
1179     if (EXSUCCEED!=ndrx_q_setblock(listen_q, EXTRUE))
1180     {
1181         NDRX_LOG(log_error, "Failed to set [%s] Q to blocked", listen_q_str);
1182         EXFAIL_OUT(ret);
1183     }
1184     
1185     ret=cmd_generic_bufcall(NDRXD_COM_DPING_RQ, NDRXD_SRC_ADMIN,
1186                         NDRXD_CALL_TYPE_GENERIC,
1187                         (command_call_t *)&req, send_size,
1188                         listen_q_str,
1189                         listen_q,
1190                         (mqd_t)EXFAIL,   /* do not keep open ndrxd q open */
1191                         ndrx_get_G_atmi_conf()->ndrxd_q_str,
1192                         0, NULL,
1193                         NULL,
1194                         ndrx_ndrxd_ping_rsp,
1195                         NULL,
1196                         EXTRUE,
1197                         EXFALSE,
1198                         NULL,
1199                         NULL,
1200                         0,
1201                         NULL);
1202     
1203    *p_time_msec = ndrx_stopwatch_get_delta(&tim);
1204    
1205 out:
1206       
1207    return ret; 
1208 }
1209 
1210 /* vim: set ts=4 sw=4 et smartindent: */