0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #include <stdio.h>
0035 #include <stdlib.h>
0036 #include <string.h>
0037 #include <errno.h>
0038 #include <regex.h>
0039 #include <utlist.h>
0040 #include <unistd.h>
0041 #include <signal.h>
0042 #include <sys/wait.h>
0043
0044 #include <ndebug.h>
0045 #include <atmi.h>
0046 #include <atmi_int.h>
0047 #include <typed_buf.h>
0048 #include <ndrstandard.h>
0049 #include <ubf.h>
0050 #include <Exfields.h>
0051
0052 #include "tprecover.h"
0053
0054
0055
0056
0057
0058
0059 static long M_restarts = 0;
0060 static long M_check = 5;
0061
0062
0063
0064
0065
0066
0067
0068
0069 static int M_ping_tout = 20;
0070 static int M_ping_max = 3;
0071
0072 static int M_bad_pings = 0;
0073
0074 int start_daemon_recover(void);
0075
0076
0077
0078
0079 void handle_sigchld(void)
0080 {
0081 while (waitpid((pid_t)(-1), 0, WNOHANG) > 0) {}
0082 }
0083
0084
0085
0086
0087
0088
0089
0090 void TPRECOVER (TPSVCINFO *p_svc)
0091 {
0092 int ret=EXSUCCEED;
0093 UBFH *p_ub = (UBFH *)p_svc->data;
0094
0095 NDRX_LOG(log_debug, "TPRECOVER got call");
0096 Bfprint(p_ub, stderr);
0097
0098 Bchg(p_ub, EXDM_RESTARTS, 1, (char *)&M_restarts, 0L);
0099
0100
0101 out:
0102 tpreturn( ret==EXSUCCEED?TPSUCCESS:TPFAIL,
0103 0,
0104 (char *)p_ub,
0105 0L,
0106 0L);
0107 }
0108
0109
0110
0111
0112
0113
0114
0115 expublic int poll_timer(void)
0116 {
0117 int ret=EXSUCCEED;
0118 int seq;
0119 long tim;
0120 int ndrxd_stat;
0121
0122
0123 handle_sigchld();
0124
0125 ndrxd_stat = ndrx_chk_ndrxd();
0126
0127 if (M_bad_pings > M_ping_max && !ndrxd_stat)
0128 {
0129 NDRX_LOG(log_always, "WARNING ! bad_pings=%d ping_max=%d and "
0130 "ndrxd not running: respawn",
0131 M_bad_pings, M_ping_max);
0132
0133 if (EXSUCCEED!=start_daemon_recover())
0134 {
0135 EXFAIL_OUT(ret);
0136 }
0137
0138 M_bad_pings = 0;
0139 }
0140 else if (!ndrxd_stat)
0141 {
0142 M_bad_pings++;
0143 NDRX_LOG(log_always, "ndrxd not present (or resources issue for process listing...)"
0144 "increase bad_pings=%d ping_max=%d",
0145 M_bad_pings, M_ping_max);
0146 }
0147 else
0148 {
0149
0150
0151 if (EXSUCCEED!=ndrx_ndrxd_ping(&seq, &tim, ndrx_get_G_atmi_conf()->reply_q,
0152 ndrx_get_G_atmi_conf()->reply_q_str))
0153 {
0154 M_bad_pings++;
0155
0156 NDRX_LOG(log_info, "ndrxd_ping_seq=%d bad_pings=%d: timeout or system error",
0157 seq, M_bad_pings);
0158 }
0159 else
0160 {
0161 NDRX_LOG(log_error, "ndrxd_ping_seq=%d time=%ld ms", seq, tim);
0162 }
0163
0164 if (M_bad_pings > M_ping_max)
0165 {
0166
0167 pid_t ndrxd_pid = ndrx_ndrxd_pid_get();
0168 NDRX_LOG(log_always, "WARNING ! bad_pings=%d ping_max=%d -> kill %d %d",
0169 M_bad_pings, M_ping_max, SIGKILL, (int)ndrxd_pid);
0170 if (EXSUCCEED!=kill(ndrxd_pid, SIGKILL))
0171 {
0172 NDRX_LOG(log_error, "Failed to kill %d: %s",
0173 (int)ndrxd_pid, strerror(errno));
0174 }
0175 }
0176 }
0177
0178 out:
0179 return ret;
0180 }
0181
0182
0183
0184
0185 int start_daemon_recover(void)
0186 {
0187 int ret=EXSUCCEED;
0188 pid_t pid;
0189 char key[NDRX_MAX_KEY_SIZE+3+1];
0190
0191 char *ndrxd_logfile = getenv(CONF_NDRX_DMNLOG);
0192
0193 pid = ndrx_fork();
0194
0195 if( pid == 0)
0196 {
0197 FILE *f;
0198 char *cmd[] = { "ndrxd", key, "-r", (char *)0 };
0199
0200
0201 snprintf(key, sizeof(key), NDRX_KEY_FMT, G_atmi_env.rnd_key);
0202
0203
0204 if (NULL==(f=NDRX_FOPEN(ndrxd_logfile, "a")))
0205 {
0206 fprintf(stderr, "Failed to open ndrxd log file: %s\n",
0207 ndrxd_logfile);
0208 }
0209 else
0210 {
0211
0212 close(1);
0213 close(2);
0214 if (EXFAIL==dup(fileno(f)))
0215 {
0216 userlog("%s: Failed to dup(1): %s", __func__, strerror(errno));
0217 }
0218
0219 if (EXFAIL==dup(fileno(f)))
0220 {
0221 userlog("%s: Failed to dup(2): %s", __func__, strerror(errno));
0222 }
0223 }
0224
0225 if (EXSUCCEED != execvp ("ndrxd", cmd))
0226 {
0227 fprintf(stderr, "Failed to start server - ndrxd!\n");
0228 exit(1);
0229 }
0230 }
0231 else
0232 {
0233 M_restarts++;
0234 NDRX_LOG(log_error, "Started ndrxd PID %d", pid);
0235 }
0236 out:
0237 return ret;
0238 }
0239
0240
0241
0242
0243 int NDRX_INTEGRA(tpsvrinit)(int argc, char **argv)
0244 {
0245 int ret=EXSUCCEED;
0246 int c;
0247 extern char *optarg;
0248 sigset_t blockMask;
0249
0250 NDRX_LOG(log_debug, "tpsvrinit called");
0251
0252 while((c = getopt(argc, argv, "c:t:m:")) != -1)
0253 {
0254 NDRX_LOG(log_debug, "%c = [%s]", c, optarg);
0255 switch(c)
0256 {
0257 case 'c':
0258 M_check = atoi(optarg);
0259 NDRX_LOG(log_debug, "check (-c): %d",
0260 M_check);
0261 break;
0262 case 't':
0263 M_ping_tout = atoi(optarg);
0264 break;
0265 case 'm':
0266 M_ping_max = atoi(optarg);
0267 break;
0268 default:
0269 NDRX_LOG(log_error, "Unknown param %c - 0x%x", c, c);
0270 EXFAIL_OUT(ret);
0271 break;
0272 }
0273 }
0274
0275 sigemptyset(&blockMask);
0276 sigaddset(&blockMask, SIGCHLD);
0277
0278 if (sigprocmask(SIG_BLOCK, &blockMask, NULL) == -1)
0279 {
0280 NDRX_LOG(log_always, "%s: sigprocmask failed: %s",
0281 __func__, strerror(errno));
0282 }
0283
0284
0285 NDRX_LOG(log_warn, "Config: ndrxd check time: %d sec", M_check);
0286 NDRX_LOG(log_warn, "Config: ndrxd ping timeout: %d sec", M_ping_tout);
0287 NDRX_LOG(log_warn, "Config: max pings for kill ndrxd: %d", M_ping_max);
0288
0289 if (EXSUCCEED!=tpext_addperiodcb((int)M_check, poll_timer))
0290 {
0291 NDRX_LOG(log_error, "tpext_addperiodcb failed: %s",
0292 tpstrerror(tperrno));
0293 EXFAIL_OUT(ret);
0294 }
0295
0296 if (EXSUCCEED!=tpadvertise(NDRX_SYS_SVC_PFX TPRECOVERSVC, TPRECOVER))
0297 {
0298 NDRX_LOG(log_error, "Failed to initialize TPRECOVER!");
0299 EXFAIL_OUT(ret);
0300 }
0301
0302 if (EXSUCCEED!=tptoutset(M_ping_tout))
0303 {
0304 NDRX_LOG(log_error, "Failed to initialize TPRECOVER!");
0305 EXFAIL_OUT(ret);
0306 }
0307
0308 out:
0309 return ret;
0310 }
0311
0312 void NDRX_INTEGRA(tpsvrdone)(void)
0313 {
0314
0315 }
0316