Back to home page

Enduro/X

 
 

    


0001 /**
0002  * @brief block token string, i.e. keep quoted strings together. strip quotes
0003  *
0004  * @file strtokblk.c
0005  */
0006 /* -----------------------------------------------------------------------------
0007  * Enduro/X Middleware Platform for Distributed Transaction Processing
0008  * Copyright (C) 2009-2016, ATR Baltic, Ltd. All Rights Reserved.
0009  * Copyright (C) 2017-2023, Mavimax, Ltd. All Rights Reserved.
0010  * This software is released under one of the following licenses:
0011  * AGPL (with Java and Go exceptions) or Mavimax's license for commercial use.
0012  * See LICENSE file for full text.
0013  * -----------------------------------------------------------------------------
0014  * AGPL license:
0015  *
0016  * This program is free software; you can redistribute it and/or modify it under
0017  * the terms of the GNU Affero General Public License, version 3 as published
0018  * by the Free Software Foundation;
0019  *
0020  * This program is distributed in the hope that it will be useful, but WITHOUT ANY
0021  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
0022  * PARTICULAR PURPOSE. See the GNU Affero General Public License, version 3
0023  * for more details.
0024  *
0025  * You should have received a copy of the GNU Affero General Public License along 
0026  * with this program; if not, write to the Free Software Foundation, Inc.,
0027  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0028  *
0029  * -----------------------------------------------------------------------------
0030  * A commercial use license is available from Mavimax, Ltd
0031  * contact@mavimax.com
0032  * -----------------------------------------------------------------------------
0033  */
0034 #include <ndrx_config.h>
0035 #include <ndrstandard.h>
0036 #include <string.h>
0037 #include <stdio.h>
0038 #include <nstdutil.h>
0039 /*---------------------------Externs------------------------------------*/
0040 /*---------------------------Macros-------------------------------------*/
0041 /*---------------------------Enums--------------------------------------*/
0042 /*---------------------------Typedefs-----------------------------------*/
0043 /*---------------------------Globals------------------------------------*/
0044 /*---------------------------Statics------------------------------------*/
0045 /*---------------------------Prototypes---------------------------------*/
0046 
0047 /**
0048  * Unescape the string
0049  * \\ becomes \
0050  * \<symb> becomes <symb>
0051  * @param input input string to process
0052  * @param symbs escaped symbol
0053  */
0054 expublic void ndrx_str_unescape(char *input, char *symbs)
0055 {
0056     char *p=input;
0057     char *output_p=input;
0058     int has_escape=0;
0059 
0060     
0061     /*printf("UNSstr [%s]\n", input);*/
0062     
0063     while ( *p != '\0')
0064     {
0065         /*printf("UNS [%c]\n", *p);*/
0066         
0067         if ('\\'==*p)
0068         {
0069             has_escape++;
0070 
0071             if (has_escape>1)
0072             {
0073                 *output_p='\\';
0074                 output_p++;
0075                 has_escape=0;
0076             }
0077 
0078         }
0079         else if (1==has_escape)
0080         {
0081             char *pp=strchr(symbs, *p);
0082             
0083             if (NULL!=pp)
0084             {
0085                 *output_p=*pp;
0086                 output_p++;
0087             }
0088             else
0089             {
0090                 *output_p='\\';
0091                 output_p++;
0092 
0093                 *output_p=*p;
0094                 output_p++;
0095             }
0096             has_escape=0;
0097         }
0098         else
0099         {
0100             *output_p=*p;
0101             output_p++;
0102         }
0103 
0104         p++;
0105     }
0106     /* terminate the string... */
0107     *output_p=0x0;
0108     
0109 }
0110 
0111 /**
0112  * Remove char from left
0113  * @param input input string to process
0114  * @param symb symbol to remove from left
0115  */
0116 expublic void ndrx_str_trim_left_single(char *input, char symb)
0117 {
0118     int len=strlen(input);
0119     char *p = strchr(input, symb);
0120     memmove(p, p+1, len-(p-input)); /* includes eos... */
0121 }
0122 
0123 /**
0124  * Tokenize string, keep blocks (e.g. quotes to gether)
0125  * This returns only valid data, and not empty strings.
0126  * @param input input string to process
0127  * @param delimit token delimiter
0128  * @param qotesymbs list of quote symbols 
0129  * @return returns token or NULL
0130  */
0131 expublic char *ndrx_strtokblk ( char *input, char *delimit, char *qotesymbs)
0132 {
0133     /* do stuff per thread, not supported with going up to golang or java 
0134      * with steps
0135      */
0136     static __thread char *p = NULL;
0137     char *token = NULL;
0138     char *block_sym = NULL;
0139     int in_block = 0;
0140     int block_sym_index = -1;
0141     int consecutive_escapes=0;
0142      
0143     /* do not return empty strings... */
0144     do
0145     {
0146         if ( input != NULL)
0147         {
0148             p = input;
0149             token = input;
0150         }
0151         else
0152         {
0153             token = p;
0154             if ( *p == '\0')
0155             {
0156                 token = NULL;
0157             }
0158         }
0159 
0160         /* escape: \ */
0161         while ( *p != '\0')
0162         {   
0163             /* printf("Symb: [%c] ESCAPES: %d INBLOCK: %d\n", *p, consecutive_escapes, in_block); */
0164             if ('\\'==*p)
0165             {
0166                 consecutive_escapes++;
0167             }
0168             else if (in_block)
0169             {
0170                 int do_inc = EXTRUE;
0171                 
0172                 /*no close if previous is \ of token */
0173                 if (qotesymbs[block_sym_index] == *p)
0174                 {
0175 
0176                     /* terminate only if not escaped.. */
0177                     if (consecutive_escapes%2==0)
0178                     {   
0179                         /* p++; - remove symbol on the fly... */
0180                         ndrx_str_trim_left_single(p, qotesymbs[block_sym_index]);
0181                         in_block = 0;
0182                         do_inc = EXFALSE;
0183                     }
0184                 }
0185                 consecutive_escapes=0;
0186                 
0187                 if (do_inc)
0188                 {
0189                     p++;
0190                 }
0191                 
0192                 continue;
0193             }
0194 
0195             /*no open if previous is \, then replace escaped quotes to single*/
0196             else if (( block_sym = strchr ( qotesymbs, *p)) != NULL)
0197             {
0198                 if (consecutive_escapes%2==0)
0199                 {
0200                     in_block = 1;
0201                     block_sym_index = block_sym - qotesymbs;
0202                     
0203                     /* p++; - remove symbol on the fly... */
0204                     ndrx_str_trim_left_single(p, qotesymbs[block_sym_index]);
0205                     
0206                     continue;
0207                 }
0208                 /* escape is spent... */
0209                 consecutive_escapes=0;
0210             }
0211 
0212             if ( strchr ( delimit, *p) != NULL)
0213             {
0214                 *p = '\0';
0215                 p++;
0216                 break;
0217             }
0218 
0219             p++;
0220         }
0221 
0222         if (block_sym_index>-1)
0223         {
0224             char escp_symb[2]={'\0', '\0'};
0225 
0226             escp_symb[0]=qotesymbs[block_sym_index];
0227 
0228             ndrx_str_unescape(token, escp_symb);
0229         }
0230         else if (NULL!=token)
0231         {
0232             /* just unescape any stuff ... */
0233             ndrx_str_unescape(token, qotesymbs);
0234         }
0235         
0236         input = NULL;
0237         
0238     } while (NULL!=token && EXEOS==token[0] && EXFAIL==block_sym_index);
0239    
0240     return token;
0241 }
0242 
0243 /* vim: set ts=4 sw=4 et smartindent: */