0001
0002 #include <pscript.h>
0003 #include <string.h>
0004 #include <ctype.h>
0005 #include <setjmp.h>
0006 #include <psstdstring.h>
0007
0008 #ifdef _DEBUG
0009 #include <stdio.h>
0010
0011 static const PSChar *g_nnames[] =
0012 {
0013 _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"),
0014 _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"),
0015 _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),
0016 _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB"),_SC("OP_MB")
0017 };
0018
0019 #endif
0020
0021 #define OP_GREEDY (MAX_CHAR+1)
0022 #define OP_OR (MAX_CHAR+2)
0023 #define OP_EXPR (MAX_CHAR+3)
0024 #define OP_NOCAPEXPR (MAX_CHAR+4)
0025 #define OP_DOT (MAX_CHAR+5)
0026 #define OP_CLASS (MAX_CHAR+6)
0027 #define OP_CCLASS (MAX_CHAR+7)
0028 #define OP_NCLASS (MAX_CHAR+8)
0029 #define OP_RANGE (MAX_CHAR+9)
0030 #define OP_CHAR (MAX_CHAR+10)
0031 #define OP_EOL (MAX_CHAR+11)
0032 #define OP_BOL (MAX_CHAR+12)
0033 #define OP_WB (MAX_CHAR+13)
0034 #define OP_MB (MAX_CHAR+14)
0035
0036 #define PSREX_SYMBOL_ANY_CHAR ('.')
0037 #define PSREX_SYMBOL_GREEDY_ONE_OR_MORE ('+')
0038 #define PSREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*')
0039 #define PSREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?')
0040 #define PSREX_SYMBOL_BRANCH ('|')
0041 #define PSREX_SYMBOL_END_OF_STRING ('$')
0042 #define PSREX_SYMBOL_BEGINNING_OF_STRING ('^')
0043 #define PSREX_SYMBOL_ESCAPE_CHAR ('\\')
0044
0045
0046 typedef int PSRexNodeType;
0047
0048 typedef struct tagPSRexNode{
0049 PSRexNodeType type;
0050 PSInteger left;
0051 PSInteger right;
0052 PSInteger next;
0053 }PSRexNode;
0054
0055 struct PSRex{
0056 const PSChar *_eol;
0057 const PSChar *_bol;
0058 const PSChar *_p;
0059 PSInteger _first;
0060 PSInteger _op;
0061 PSRexNode *_nodes;
0062 PSInteger _nallocated;
0063 PSInteger _nsize;
0064 PSInteger _nsubexpr;
0065 PSRexMatch *_matches;
0066 PSInteger _currsubexp;
0067 void *_jmpbuf;
0068 const PSChar **_error;
0069 };
0070
0071 static PSInteger psstd_rex_list(PSRex *exp);
0072
0073 static PSInteger psstd_rex_newnode(PSRex *exp, PSRexNodeType type)
0074 {
0075 PSRexNode n;
0076 n.type = type;
0077 n.next = n.right = n.left = -1;
0078 if(type == OP_EXPR)
0079 n.right = exp->_nsubexpr++;
0080 if(exp->_nallocated < (exp->_nsize + 1)) {
0081 PSInteger oldsize = exp->_nallocated;
0082 exp->_nallocated *= 2;
0083 exp->_nodes = (PSRexNode *)ps_realloc(exp->_nodes, oldsize * sizeof(PSRexNode) ,exp->_nallocated * sizeof(PSRexNode));
0084 }
0085 exp->_nodes[exp->_nsize++] = n;
0086 PSInteger newid = exp->_nsize - 1;
0087 return (PSInteger)newid;
0088 }
0089
0090 static void psstd_rex_error(PSRex *exp,const PSChar *error)
0091 {
0092 if(exp->_error) *exp->_error = error;
0093 longjmp(*((jmp_buf*)exp->_jmpbuf),-1);
0094 }
0095
0096 static void psstd_rex_expect(PSRex *exp, PSInteger n){
0097 if((*exp->_p) != n)
0098 psstd_rex_error(exp, _SC("expected paren"));
0099 exp->_p++;
0100 }
0101
0102 static PSChar psstd_rex_escapechar(PSRex *exp)
0103 {
0104 if(*exp->_p == PSREX_SYMBOL_ESCAPE_CHAR){
0105 exp->_p++;
0106 switch(*exp->_p) {
0107 case 'v': exp->_p++; return '\v';
0108 case 'n': exp->_p++; return '\n';
0109 case 't': exp->_p++; return '\t';
0110 case 'r': exp->_p++; return '\r';
0111 case 'f': exp->_p++; return '\f';
0112 default: return (*exp->_p++);
0113 }
0114 } else if(!scisprint(*exp->_p)) psstd_rex_error(exp,_SC("letter expected"));
0115 return (*exp->_p++);
0116 }
0117
0118 static PSInteger psstd_rex_charclass(PSRex *exp,PSInteger classid)
0119 {
0120 PSInteger n = psstd_rex_newnode(exp,OP_CCLASS);
0121 exp->_nodes[n].left = classid;
0122 return n;
0123 }
0124
0125 static PSInteger psstd_rex_charnode(PSRex *exp,PSBool isclass)
0126 {
0127 PSChar t;
0128 if(*exp->_p == PSREX_SYMBOL_ESCAPE_CHAR) {
0129 exp->_p++;
0130 switch(*exp->_p) {
0131 case 'n': exp->_p++; return psstd_rex_newnode(exp,'\n');
0132 case 't': exp->_p++; return psstd_rex_newnode(exp,'\t');
0133 case 'r': exp->_p++; return psstd_rex_newnode(exp,'\r');
0134 case 'f': exp->_p++; return psstd_rex_newnode(exp,'\f');
0135 case 'v': exp->_p++; return psstd_rex_newnode(exp,'\v');
0136 case 'a': case 'A': case 'w': case 'W': case 's': case 'S':
0137 case 'd': case 'D': case 'x': case 'X': case 'c': case 'C':
0138 case 'p': case 'P': case 'l': case 'u':
0139 {
0140 t = *exp->_p; exp->_p++;
0141 return psstd_rex_charclass(exp,t);
0142 }
0143 case 'm':
0144 {
0145 PSChar cb, ce;
0146 cb = *++exp->_p;
0147 ce = *++exp->_p;
0148 exp->_p++;
0149 if ((!cb) || (!ce)) psstd_rex_error(exp,_SC("balanced chars expected"));
0150 if ( cb == ce ) psstd_rex_error(exp,_SC("open/close char can't be the same"));
0151 PSInteger node = psstd_rex_newnode(exp,OP_MB);
0152 exp->_nodes[node].left = cb;
0153 exp->_nodes[node].right = ce;
0154 return node;
0155 }
0156 case 'b':
0157 case 'B':
0158 if(!isclass) {
0159 PSInteger node = psstd_rex_newnode(exp,OP_WB);
0160 exp->_nodes[node].left = *exp->_p;
0161 exp->_p++;
0162 return node;
0163 }
0164 default:
0165 t = *exp->_p; exp->_p++;
0166 return psstd_rex_newnode(exp,t);
0167 }
0168 }
0169 else if(!scisprint(*exp->_p)) {
0170
0171 psstd_rex_error(exp,_SC("letter expected"));
0172 }
0173 t = *exp->_p; exp->_p++;
0174 return psstd_rex_newnode(exp,t);
0175 }
0176 static PSInteger psstd_rex_class(PSRex *exp)
0177 {
0178 PSInteger ret = -1;
0179 PSInteger first = -1,chain;
0180 if(*exp->_p == PSREX_SYMBOL_BEGINNING_OF_STRING){
0181 ret = psstd_rex_newnode(exp,OP_NCLASS);
0182 exp->_p++;
0183 }else ret = psstd_rex_newnode(exp,OP_CLASS);
0184
0185 if(*exp->_p == ']') psstd_rex_error(exp,_SC("empty class"));
0186 chain = ret;
0187 while(*exp->_p != ']' && exp->_p != exp->_eol) {
0188 if(*exp->_p == '-' && first != -1){
0189 PSInteger r;
0190 if(*exp->_p++ == ']') psstd_rex_error(exp,_SC("unfinished range"));
0191 r = psstd_rex_newnode(exp,OP_RANGE);
0192 if(exp->_nodes[first].type>*exp->_p) psstd_rex_error(exp,_SC("invalid range"));
0193 if(exp->_nodes[first].type == OP_CCLASS) psstd_rex_error(exp,_SC("cannot use character classes in ranges"));
0194 exp->_nodes[r].left = exp->_nodes[first].type;
0195 PSInteger t = psstd_rex_escapechar(exp);
0196 exp->_nodes[r].right = t;
0197 exp->_nodes[chain].next = r;
0198 chain = r;
0199 first = -1;
0200 }
0201 else{
0202 if(first!=-1){
0203 PSInteger c = first;
0204 exp->_nodes[chain].next = c;
0205 chain = c;
0206 first = psstd_rex_charnode(exp,PSTrue);
0207 }
0208 else{
0209 first = psstd_rex_charnode(exp,PSTrue);
0210 }
0211 }
0212 }
0213 if(first!=-1){
0214 PSInteger c = first;
0215 exp->_nodes[chain].next = c;
0216 }
0217
0218 exp->_nodes[ret].left = exp->_nodes[ret].next;
0219 exp->_nodes[ret].next = -1;
0220 return ret;
0221 }
0222
0223 static PSInteger psstd_rex_parsenumber(PSRex *exp)
0224 {
0225 PSInteger ret = *exp->_p-'0';
0226 PSInteger positions = 10;
0227 exp->_p++;
0228 while(isdigit(*exp->_p)) {
0229 ret = ret*10+(*exp->_p++-'0');
0230 if(positions==1000000000) psstd_rex_error(exp,_SC("overflow in numeric constant"));
0231 positions *= 10;
0232 };
0233 return ret;
0234 }
0235
0236 static PSInteger psstd_rex_element(PSRex *exp)
0237 {
0238 PSInteger ret = -1;
0239 switch(*exp->_p)
0240 {
0241 case '(': {
0242 PSInteger expr;
0243 exp->_p++;
0244
0245
0246 if(*exp->_p =='?') {
0247 exp->_p++;
0248 psstd_rex_expect(exp,':');
0249 expr = psstd_rex_newnode(exp,OP_NOCAPEXPR);
0250 }
0251 else
0252 expr = psstd_rex_newnode(exp,OP_EXPR);
0253 PSInteger newn = psstd_rex_list(exp);
0254 exp->_nodes[expr].left = newn;
0255 ret = expr;
0256 psstd_rex_expect(exp,')');
0257 }
0258 break;
0259 case '[':
0260 exp->_p++;
0261 ret = psstd_rex_class(exp);
0262 psstd_rex_expect(exp,']');
0263 break;
0264 case PSREX_SYMBOL_END_OF_STRING: exp->_p++; ret = psstd_rex_newnode(exp,OP_EOL);break;
0265 case PSREX_SYMBOL_ANY_CHAR: exp->_p++; ret = psstd_rex_newnode(exp,OP_DOT);break;
0266 default:
0267 ret = psstd_rex_charnode(exp,PSFalse);
0268 break;
0269 }
0270
0271
0272 PSBool isgreedy = PSFalse;
0273 unsigned short p0 = 0, p1 = 0;
0274 switch(*exp->_p){
0275 case PSREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; isgreedy = PSTrue; break;
0276 case PSREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; isgreedy = PSTrue; break;
0277 case PSREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; isgreedy = PSTrue; break;
0278 case '{':
0279 exp->_p++;
0280 if(!isdigit(*exp->_p)) psstd_rex_error(exp,_SC("number expected"));
0281 p0 = (unsigned short)psstd_rex_parsenumber(exp);
0282
0283 switch(*exp->_p) {
0284 case '}':
0285 p1 = p0; exp->_p++;
0286 break;
0287 case ',':
0288 exp->_p++;
0289 p1 = 0xFFFF;
0290 if(isdigit(*exp->_p)){
0291 p1 = (unsigned short)psstd_rex_parsenumber(exp);
0292 }
0293 psstd_rex_expect(exp,'}');
0294 break;
0295 default:
0296 psstd_rex_error(exp,_SC(", or } expected"));
0297 }
0298
0299 isgreedy = PSTrue;
0300 break;
0301
0302 }
0303 if(isgreedy) {
0304 PSInteger nnode = psstd_rex_newnode(exp,OP_GREEDY);
0305 exp->_nodes[nnode].left = ret;
0306 exp->_nodes[nnode].right = ((p0)<<16)|p1;
0307 ret = nnode;
0308 }
0309
0310 if((*exp->_p != PSREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != PSREX_SYMBOL_GREEDY_ZERO_OR_MORE) && (*exp->_p != PSREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) {
0311 PSInteger nnode = psstd_rex_element(exp);
0312 exp->_nodes[ret].next = nnode;
0313 }
0314
0315 return ret;
0316 }
0317
0318 static PSInteger psstd_rex_list(PSRex *exp)
0319 {
0320 PSInteger ret=-1,e;
0321 if(*exp->_p == PSREX_SYMBOL_BEGINNING_OF_STRING) {
0322 exp->_p++;
0323 ret = psstd_rex_newnode(exp,OP_BOL);
0324 }
0325 e = psstd_rex_element(exp);
0326 if(ret != -1) {
0327 exp->_nodes[ret].next = e;
0328 }
0329 else ret = e;
0330
0331 if(*exp->_p == PSREX_SYMBOL_BRANCH) {
0332 PSInteger temp,tright;
0333 exp->_p++;
0334 temp = psstd_rex_newnode(exp,OP_OR);
0335 exp->_nodes[temp].left = ret;
0336 tright = psstd_rex_list(exp);
0337 exp->_nodes[temp].right = tright;
0338 ret = temp;
0339 }
0340 return ret;
0341 }
0342
0343 static PSBool psstd_rex_matchcclass(PSInteger cclass,PSChar c)
0344 {
0345 switch(cclass) {
0346 case 'a': return isalpha(c)?PSTrue:PSFalse;
0347 case 'A': return !isalpha(c)?PSTrue:PSFalse;
0348 case 'w': return (isalnum(c) || c == '_')?PSTrue:PSFalse;
0349 case 'W': return (!isalnum(c) && c != '_')?PSTrue:PSFalse;
0350 case 's': return isspace(c)?PSTrue:PSFalse;
0351 case 'S': return !isspace(c)?PSTrue:PSFalse;
0352 case 'd': return isdigit(c)?PSTrue:PSFalse;
0353 case 'D': return !isdigit(c)?PSTrue:PSFalse;
0354 case 'x': return isxdigit(c)?PSTrue:PSFalse;
0355 case 'X': return !isxdigit(c)?PSTrue:PSFalse;
0356 case 'c': return iscntrl(c)?PSTrue:PSFalse;
0357 case 'C': return !iscntrl(c)?PSTrue:PSFalse;
0358 case 'p': return ispunct(c)?PSTrue:PSFalse;
0359 case 'P': return !ispunct(c)?PSTrue:PSFalse;
0360 case 'l': return islower(c)?PSTrue:PSFalse;
0361 case 'u': return isupper(c)?PSTrue:PSFalse;
0362 }
0363 return PSFalse;
0364 }
0365
0366 static PSBool psstd_rex_matchclass(PSRex* exp,PSRexNode *node,PSChar c)
0367 {
0368 do {
0369 switch(node->type) {
0370 case OP_RANGE:
0371 if(c >= node->left && c <= node->right) return PSTrue;
0372 break;
0373 case OP_CCLASS:
0374 if(psstd_rex_matchcclass(node->left,c)) return PSTrue;
0375 break;
0376 default:
0377 if(c == node->type)return PSTrue;
0378 }
0379 } while((node->next != -1) && (node = &exp->_nodes[node->next]));
0380 return PSFalse;
0381 }
0382
0383 static const PSChar *psstd_rex_matchnode(PSRex* exp,PSRexNode *node,const PSChar *str,PSRexNode *next)
0384 {
0385
0386 PSRexNodeType type = node->type;
0387 switch(type) {
0388 case OP_GREEDY: {
0389
0390 PSRexNode *greedystop = NULL;
0391 PSInteger p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;
0392 const PSChar *s=str, *good = str;
0393
0394 if(node->next != -1) {
0395 greedystop = &exp->_nodes[node->next];
0396 }
0397 else {
0398 greedystop = next;
0399 }
0400
0401 while((nmaches == 0xFFFF || nmaches < p1)) {
0402
0403 const PSChar *stop;
0404 if(!(s = psstd_rex_matchnode(exp,&exp->_nodes[node->left],s,greedystop)))
0405 break;
0406 nmaches++;
0407 good=s;
0408 if(greedystop) {
0409
0410
0411 if(greedystop->type != OP_GREEDY ||
0412 (greedystop->type == OP_GREEDY && ((greedystop->right >> 16)&0x0000FFFF) != 0))
0413 {
0414 PSRexNode *gnext = NULL;
0415 if(greedystop->next != -1) {
0416 gnext = &exp->_nodes[greedystop->next];
0417 }else if(next && next->next != -1){
0418 gnext = &exp->_nodes[next->next];
0419 }
0420 stop = psstd_rex_matchnode(exp,greedystop,s,gnext);
0421 if(stop) {
0422
0423 if(p0 == p1 && p0 == nmaches) break;
0424 else if(nmaches >= p0 && p1 == 0xFFFF) break;
0425 else if(nmaches >= p0 && nmaches <= p1) break;
0426 }
0427 }
0428 }
0429
0430 if(s >= exp->_eol)
0431 break;
0432 }
0433 if(p0 == p1 && p0 == nmaches) return good;
0434 else if(nmaches >= p0 && p1 == 0xFFFF) return good;
0435 else if(nmaches >= p0 && nmaches <= p1) return good;
0436 return NULL;
0437 }
0438 case OP_OR: {
0439 const PSChar *asd = str;
0440 PSRexNode *temp=&exp->_nodes[node->left];
0441 while( (asd = psstd_rex_matchnode(exp,temp,asd,NULL)) ) {
0442 if(temp->next != -1)
0443 temp = &exp->_nodes[temp->next];
0444 else
0445 return asd;
0446 }
0447 asd = str;
0448 temp = &exp->_nodes[node->right];
0449 while( (asd = psstd_rex_matchnode(exp,temp,asd,NULL)) ) {
0450 if(temp->next != -1)
0451 temp = &exp->_nodes[temp->next];
0452 else
0453 return asd;
0454 }
0455 return NULL;
0456 break;
0457 }
0458 case OP_EXPR:
0459 case OP_NOCAPEXPR:{
0460 PSRexNode *n = &exp->_nodes[node->left];
0461 const PSChar *cur = str;
0462 PSInteger capture = -1;
0463 if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {
0464 capture = exp->_currsubexp;
0465 exp->_matches[capture].begin = cur;
0466 exp->_currsubexp++;
0467 }
0468 PSInteger tempcap = exp->_currsubexp;
0469 do {
0470 PSRexNode *subnext = NULL;
0471 if(n->next != -1) {
0472 subnext = &exp->_nodes[n->next];
0473 }else {
0474 subnext = next;
0475 }
0476 if(!(cur = psstd_rex_matchnode(exp,n,cur,subnext))) {
0477 if(capture != -1){
0478 exp->_matches[capture].begin = 0;
0479 exp->_matches[capture].len = 0;
0480 }
0481 return NULL;
0482 }
0483 } while((n->next != -1) && (n = &exp->_nodes[n->next]));
0484
0485 exp->_currsubexp = tempcap;
0486 if(capture != -1)
0487 exp->_matches[capture].len = cur - exp->_matches[capture].begin;
0488 return cur;
0489 }
0490 case OP_WB:
0491 if((str == exp->_bol && !isspace(*str))
0492 || (str == exp->_eol && !isspace(*(str-1)))
0493 || (!isspace(*str) && isspace(*(str+1)))
0494 || (isspace(*str) && !isspace(*(str+1))) ) {
0495 return (node->left == 'b')?str:NULL;
0496 }
0497 return (node->left == 'b')?NULL:str;
0498 case OP_BOL:
0499 if(str == exp->_bol) return str;
0500 return NULL;
0501 case OP_EOL:
0502 if(str == exp->_eol) return str;
0503 return NULL;
0504 case OP_DOT:{
0505 if (str == exp->_eol) return NULL;
0506 str++;
0507 }
0508 return str;
0509 case OP_NCLASS:
0510 case OP_CLASS:
0511 if (str == exp->_eol) return NULL;
0512 if(psstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?PSTrue:PSFalse):(type == OP_NCLASS?PSTrue:PSFalse)) {
0513 str++;
0514 return str;
0515 }
0516 return NULL;
0517 case OP_CCLASS:
0518 if (str == exp->_eol) return NULL;
0519 if(psstd_rex_matchcclass(node->left,*str)) {
0520 str++;
0521 return str;
0522 }
0523 return NULL;
0524 case OP_MB:
0525 {
0526 PSInteger cb = node->left;
0527 if(*str != cb) return NULL;
0528 PSInteger ce = node->right;
0529 PSInteger cont = 1;
0530 const PSChar *streol = exp->_eol;
0531 while (++str < streol) {
0532 if (*str == ce) {
0533 if (--cont == 0) {
0534 return ++str;
0535 }
0536 }
0537 else if (*str == cb) cont++;
0538 }
0539 }
0540 return NULL;
0541 default:
0542 if (str == exp->_eol) return NULL;
0543 if(*str != node->type) return NULL;
0544 str++;
0545 return str;
0546 }
0547 return NULL;
0548 }
0549
0550
0551 PSRex *psstd_rex_compile(const PSChar *pattern,const PSChar **error)
0552 {
0553 PSRex * volatile exp = (PSRex *)ps_malloc(sizeof(PSRex));
0554 exp->_eol = exp->_bol = NULL;
0555 exp->_p = pattern;
0556 exp->_nallocated = (PSInteger)scstrlen(pattern) * sizeof(PSChar);
0557 exp->_nodes = (PSRexNode *)ps_malloc(exp->_nallocated * sizeof(PSRexNode));
0558 exp->_nsize = 0;
0559 exp->_matches = 0;
0560 exp->_nsubexpr = 0;
0561 exp->_first = psstd_rex_newnode(exp,OP_EXPR);
0562 exp->_error = error;
0563 exp->_jmpbuf = ps_malloc(sizeof(jmp_buf));
0564 if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {
0565 PSInteger res = psstd_rex_list(exp);
0566 exp->_nodes[exp->_first].left = res;
0567 if(*exp->_p!='\0')
0568 psstd_rex_error(exp,_SC("unexpected character"));
0569 #ifdef _DEBUG
0570 {
0571 PSInteger nsize,i;
0572 PSRexNode *t;
0573 nsize = exp->_nsize;
0574 t = &exp->_nodes[0];
0575 scprintf(_SC("\n"));
0576 for(i = 0;i < nsize; i++) {
0577 if(exp->_nodes[i].type>MAX_CHAR)
0578 scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);
0579 else
0580 scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);
0581 scprintf(_SC("left %02d right %02d next %02d\n"), (PSInt32)exp->_nodes[i].left, (PSInt32)exp->_nodes[i].right, (PSInt32)exp->_nodes[i].next);
0582 }
0583 scprintf(_SC("\n"));
0584 }
0585 #endif
0586 exp->_matches = (PSRexMatch *) ps_malloc(exp->_nsubexpr * sizeof(PSRexMatch));
0587 memset(exp->_matches,0,exp->_nsubexpr * sizeof(PSRexMatch));
0588 }
0589 else{
0590 psstd_rex_free(exp);
0591 return NULL;
0592 }
0593 return exp;
0594 }
0595
0596 void psstd_rex_free(PSRex *exp)
0597 {
0598 if(exp) {
0599 if(exp->_nodes) ps_free(exp->_nodes,exp->_nallocated * sizeof(PSRexNode));
0600 if(exp->_jmpbuf) ps_free(exp->_jmpbuf,sizeof(jmp_buf));
0601 if(exp->_matches) ps_free(exp->_matches,exp->_nsubexpr * sizeof(PSRexMatch));
0602 ps_free(exp,sizeof(PSRex));
0603 }
0604 }
0605
0606 PSBool psstd_rex_match(PSRex* exp,const PSChar* text)
0607 {
0608 const PSChar* res = NULL;
0609 exp->_bol = text;
0610 exp->_eol = text + scstrlen(text);
0611 exp->_currsubexp = 0;
0612 res = psstd_rex_matchnode(exp,exp->_nodes,text,NULL);
0613 if(res == NULL || res != exp->_eol)
0614 return PSFalse;
0615 return PSTrue;
0616 }
0617
0618 PSBool psstd_rex_searchrange(PSRex* exp,const PSChar* text_begin,const PSChar* text_end,const PSChar** out_begin, const PSChar** out_end)
0619 {
0620 const PSChar *cur = NULL;
0621 PSInteger node = exp->_first;
0622 if(text_begin >= text_end) return PSFalse;
0623 exp->_bol = text_begin;
0624 exp->_eol = text_end;
0625 do {
0626 cur = text_begin;
0627 while(node != -1) {
0628 exp->_currsubexp = 0;
0629 cur = psstd_rex_matchnode(exp,&exp->_nodes[node],cur,NULL);
0630 if(!cur)
0631 break;
0632 node = exp->_nodes[node].next;
0633 }
0634 text_begin++;
0635 } while(cur == NULL && text_begin != text_end);
0636
0637 if(cur == NULL)
0638 return PSFalse;
0639
0640 --text_begin;
0641
0642 if(out_begin) *out_begin = text_begin;
0643 if(out_end) *out_end = cur;
0644 return PSTrue;
0645 }
0646
0647 PSBool psstd_rex_search(PSRex* exp,const PSChar* text, const PSChar** out_begin, const PSChar** out_end)
0648 {
0649 return psstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);
0650 }
0651
0652 PSInteger psstd_rex_getsubexpcount(PSRex* exp)
0653 {
0654 return exp->_nsubexpr;
0655 }
0656
0657 PSBool psstd_rex_getsubexp(PSRex* exp, PSInteger n, PSRexMatch *subexp)
0658 {
0659 if( n<0 || n >= exp->_nsubexpr) return PSFalse;
0660 *subexp = exp->_matches[n];
0661 return PSTrue;
0662 }
0663