1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
61
62
63
64
69
70
74
81
82
83
84
85
86
87
88
89
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
142
145
146
152
153
154
155
156
159
162
166
167
171
175
179
183
187
195
196
197
200
201
202
205
206
207
208
209
210
211
220
221
222
223
226
238
239
242
243
244
245
246
247
248
249
250
251
252
253
254
257
258
259
260
264
265
269
270
271
272
273
274
277
280
281
287
299
300
303
312
313
314
315
329
330
331
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
370
373
374
380
381
382
383
384
387
390
394
416
417
420
421
422
425
426
427
428
429
430
431
432
433
434
437
448
449
450
451
452
453
454
460
461
462
463
464
465
466
473
476
477
478
479
483
484
485
491
492
493
494
495
496
499
516
520
524
525
526
527
528
529
530
533
534
535
538
539
540
546
547
551
554
555
556
557
561
562
563
564
574
578
579
580
581
582
/* ... */
#include <stdlib.h>
#include <string.h>
#include "jimautoconf.h"
#if defined(JIM_REGEXP)
#include "jimregexp.h"
#else
#include <regex.h>
#define jim_regcomp regcomp
#define jim_regexec regexec
#define jim_regerror regerror
#define jim_regfree regfree
/* ... */#endif
#include "jim.h"
#include "utf8.h"
static void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr)
{
jim_regfree(objPtr->internalRep.ptrIntValue.ptr);
Jim_Free(objPtr->internalRep.ptrIntValue.ptr);
}{ ... }
/* ... */
static const Jim_ObjType regexpObjType = {
"regexp",
FreeRegexpInternalRep,
NULL,
NULL,
JIM_TYPE_NONE
...};
static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned flags)
{
regex_t *compre;
const char *pattern;
int ret;
if (objPtr->typePtr == ®expObjType &&
objPtr->internalRep.ptrIntValue.ptr && objPtr->internalRep.ptrIntValue.int1 == flags) {
return objPtr->internalRep.ptrIntValue.ptr;
}if (objPtr->typePtr == ®expObjType && objPtr->internalRep.ptrIntValue.ptr && objPtr->internalRep.ptrIntValue.int1 == flags) { ... }
pattern = Jim_String(objPtr);
compre = Jim_Alloc(sizeof(regex_t));
if ((ret = jim_regcomp(compre, pattern, REG_EXTENDED | flags)) != 0) {
char buf[100];
jim_regerror(ret, compre, buf, sizeof(buf));
Jim_SetResultFormatted(interp, "couldn't compile regular expression pattern: %s", buf);
jim_regfree(compre);
Jim_Free(compre);
return NULL;
}if ((ret = jim_regcomp(compre, pattern, REG_EXTENDED | flags)) != 0) { ... }
Jim_FreeIntRep(interp, objPtr);
objPtr->typePtr = ®expObjType;
objPtr->internalRep.ptrIntValue.int1 = flags;
objPtr->internalRep.ptrIntValue.ptr = compre;
return compre;
}{ ... }
int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
{
int opt_indices = 0;
int opt_all = 0;
int opt_inline = 0;
regex_t *regex;
int match, i, j;
int offset = 0;
regmatch_t *pmatch = NULL;
int source_len;
int result = JIM_OK;
const char *pattern;
const char *source_str;
int num_matches = 0;
int num_vars;
Jim_Obj *resultListObj = NULL;
int regcomp_flags = 0;
int eflags = 0;
int option;
enum {
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END
...};
static const char * const options[] = {
"-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL
...};
if (argc < 3) {
wrongNumArgs:
Jim_WrongNumArgs(interp, 1, argv,
"?-switch ...? exp string ?matchVar? ?subMatchVar ...?");
return JIM_ERR;
}if (argc < 3) { ... }
for (i = 1; i < argc; i++) {
const char *opt = Jim_String(argv[i]);
if (*opt != '-') {
break;
}if (*opt != '-') { ... }
if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
return JIM_ERR;
}if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) { ... }
if (option == OPT_END) {
i++;
break;
}if (option == OPT_END) { ... }
switch (option) {
case OPT_INDICES:
opt_indices = 1;
break;
case OPT_INDICES:
case OPT_NOCASE:
regcomp_flags |= REG_ICASE;
break;
case OPT_NOCASE:
case OPT_LINE:
regcomp_flags |= REG_NEWLINE;
break;
case OPT_LINE:
case OPT_ALL:
opt_all = 1;
break;
case OPT_ALL:
case OPT_INLINE:
opt_inline = 1;
break;
case OPT_INLINE:
case OPT_START:
if (++i == argc) {
goto wrongNumArgs;
}if (++i == argc) { ... }
if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) {
return JIM_ERR;
}if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) { ... }
break;case OPT_START:
}switch (option) { ... }
}for (i = 1; i < argc; i++) { ... }
if (argc - i < 2) {
goto wrongNumArgs;
}if (argc - i < 2) { ... }
regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
if (!regex) {
return JIM_ERR;
}if (!regex) { ... }
pattern = Jim_String(argv[i]);
source_str = Jim_GetString(argv[i + 1], &source_len);
num_vars = argc - i - 2;
if (opt_inline) {
if (num_vars) {
Jim_SetResultString(interp, "regexp match variables not allowed when using -inline",
-1);
result = JIM_ERR;
goto done;
}if (num_vars) { ... }
num_vars = regex->re_nsub + 1;
}if (opt_inline) { ... }
pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch));
/* ... */
if (offset) {
if (offset < 0) {
offset += source_len + 1;
}if (offset < 0) { ... }
if (offset > source_len) {
source_str += source_len;
}if (offset > source_len) { ... }
else if (offset > 0) {
source_str += utf8_index(source_str, offset);
}else if (offset > 0) { ... }
eflags |= REG_NOTBOL;
}if (offset) { ... }
if (opt_inline) {
resultListObj = Jim_NewListObj(interp, NULL, 0);
}if (opt_inline) { ... }
next_match:
match = jim_regexec(regex, source_str, num_vars + 1, pmatch, eflags);
if (match >= REG_BADPAT) {
char buf[100];
jim_regerror(match, regex, buf, sizeof(buf));
Jim_SetResultFormatted(interp, "error while matching pattern: %s", buf);
result = JIM_ERR;
goto done;
}if (match >= REG_BADPAT) { ... }
if (match == REG_NOMATCH) {
goto done;
}if (match == REG_NOMATCH) { ... }
num_matches++;
if (opt_all && !opt_inline) {
goto try_next_match;
}if (opt_all && !opt_inline) { ... }
/* ... */
j = 0;
for (i += 2; opt_inline ? j < num_vars : i < argc; i++, j++) {
Jim_Obj *resultObj;
if (opt_indices) {
resultObj = Jim_NewListObj(interp, NULL, 0);
}if (opt_indices) { ... }
else {
resultObj = Jim_NewStringObj(interp, "", 0);
}else { ... }
if (pmatch[j].rm_so == -1) {
if (opt_indices) {
Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
}if (opt_indices) { ... }
}if (pmatch[j].rm_so == -1) { ... }
else {
if (opt_indices) {
int so = utf8_strlen(source_str, pmatch[j].rm_so);
int eo = utf8_strlen(source_str, pmatch[j].rm_eo);
Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + so));
Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + eo - 1));
}if (opt_indices) { ... }
else {
Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, pmatch[j].rm_eo - pmatch[j].rm_so);
}else { ... }
}else { ... }
if (opt_inline) {
Jim_ListAppendElement(interp, resultListObj, resultObj);
}if (opt_inline) { ... }
else {
result = Jim_SetVariable(interp, argv[i], resultObj);
if (result != JIM_OK) {
Jim_FreeObj(interp, resultObj);
break;
}if (result != JIM_OK) { ... }
}else { ... }
}for (i += 2; opt_inline ? j < num_vars : i < argc; i++, j++) { ... }
try_next_match:
if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
if (pmatch[0].rm_eo) {
offset += utf8_strlen(source_str, pmatch[0].rm_eo);
source_str += pmatch[0].rm_eo;
}if (pmatch[0].rm_eo) { ... }
else {
source_str++;
offset++;
}else { ... }
if (*source_str) {
eflags = REG_NOTBOL;
goto next_match;
}if (*source_str) { ... }
}if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) { ... }
done:
if (result == JIM_OK) {
if (opt_inline) {
Jim_SetResult(interp, resultListObj);
}if (opt_inline) { ... }
else {
Jim_SetResultInt(interp, num_matches);
}else { ... }
}if (result == JIM_OK) { ... }
Jim_Free(pmatch);
return result;
}{ ... }
#define MAX_SUB_MATCHES 50
int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
{
int regcomp_flags = 0;
int regexec_flags = 0;
int opt_all = 0;
int offset = 0;
regex_t *regex;
const char *p;
int result;
regmatch_t pmatch[MAX_SUB_MATCHES + 1];
int num_matches = 0;
int i, j, n;
Jim_Obj *varname;
Jim_Obj *resultObj;
const char *source_str;
int source_len;
const char *replace_str;
int replace_len;
const char *pattern;
int option;
enum {
OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_END
...};
static const char * const options[] = {
"-nocase", "-line", "-all", "-start", "--", NULL
...};
if (argc < 4) {
wrongNumArgs:
Jim_WrongNumArgs(interp, 1, argv,
"?-switch ...? exp string subSpec ?varName?");
return JIM_ERR;
}if (argc < 4) { ... }
for (i = 1; i < argc; i++) {
const char *opt = Jim_String(argv[i]);
if (*opt != '-') {
break;
}if (*opt != '-') { ... }
if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
return JIM_ERR;
}if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) { ... }
if (option == OPT_END) {
i++;
break;
}if (option == OPT_END) { ... }
switch (option) {
case OPT_NOCASE:
regcomp_flags |= REG_ICASE;
break;
case OPT_NOCASE:
case OPT_LINE:
regcomp_flags |= REG_NEWLINE;
break;
case OPT_LINE:
case OPT_ALL:
opt_all = 1;
break;
case OPT_ALL:
case OPT_START:
if (++i == argc) {
goto wrongNumArgs;
}if (++i == argc) { ... }
if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) {
return JIM_ERR;
}if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) { ... }
break;case OPT_START:
}switch (option) { ... }
}for (i = 1; i < argc; i++) { ... }
if (argc - i != 3 && argc - i != 4) {
goto wrongNumArgs;
}if (argc - i != 3 && argc - i != 4) { ... }
regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
if (!regex) {
return JIM_ERR;
}if (!regex) { ... }
pattern = Jim_String(argv[i]);
source_str = Jim_GetString(argv[i + 1], &source_len);
replace_str = Jim_GetString(argv[i + 2], &replace_len);
varname = argv[i + 3];
resultObj = Jim_NewStringObj(interp, "", 0);
/* ... */
if (offset) {
if (offset < 0) {
offset += source_len + 1;
}if (offset < 0) { ... }
if (offset > source_len) {
offset = source_len;
}if (offset > source_len) { ... }
else if (offset < 0) {
offset = 0;
}else if (offset < 0) { ... }
}if (offset) { ... }
offset = utf8_index(source_str, offset);
Jim_AppendString(interp, resultObj, source_str, offset);
/* ... */
n = source_len - offset;
p = source_str + offset;
do {
int match = jim_regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);
if (match >= REG_BADPAT) {
char buf[100];
jim_regerror(match, regex, buf, sizeof(buf));
Jim_SetResultFormatted(interp, "error while matching pattern: %s", buf);
return JIM_ERR;
}if (match >= REG_BADPAT) { ... }
if (match == REG_NOMATCH) {
break;
}if (match == REG_NOMATCH) { ... }
num_matches++;
/* ... */
Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so);
/* ... */
for (j = 0; j < replace_len; j++) {
int idx;
int c = replace_str[j];
if (c == '&') {
idx = 0;
}if (c == '&') { ... }
else if (c == '\\' && j < replace_len) {
c = replace_str[++j];
if ((c >= '0') && (c <= '9')) {
idx = c - '0';
}if ((c >= '0') && (c <= '9')) { ... }
else if ((c == '\\') || (c == '&')) {
Jim_AppendString(interp, resultObj, replace_str + j, 1);
continue;
}else if ((c == '\\') || (c == '&')) { ... }
else {
/* ... */
Jim_AppendString(interp, resultObj, replace_str + j - 1, (j == replace_len) ? 1 : 2);
continue;
}else { ... }
}else if (c == '\\' && j < replace_len) { ... }
else {
Jim_AppendString(interp, resultObj, replace_str + j, 1);
continue;
}else { ... }
if ((idx < MAX_SUB_MATCHES) && pmatch[idx].rm_so != -1 && pmatch[idx].rm_eo != -1) {
Jim_AppendString(interp, resultObj, p + pmatch[idx].rm_so,
pmatch[idx].rm_eo - pmatch[idx].rm_so);
}if ((idx < MAX_SUB_MATCHES) && pmatch[idx].rm_so != -1 && pmatch[idx].rm_eo != -1) { ... }
}for (j = 0; j < replace_len; j++) { ... }
p += pmatch[0].rm_eo;
n -= pmatch[0].rm_eo;
if (!opt_all || n == 0) {
break;
}if (!opt_all || n == 0) { ... }
if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') {
break;
}if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') { ... }
if (pattern[0] == '\0' && n) {
Jim_AppendString(interp, resultObj, p, 1);
p++;
n--;
}if (pattern[0] == '\0' && n) { ... }
if (pmatch[0].rm_eo == pmatch[0].rm_so) {
regexec_flags = REG_NOTBOL;
}if (pmatch[0].rm_eo == pmatch[0].rm_so) { ... }
else {
regexec_flags = 0;
}else { ... }
...} while (n);
/* ... */
Jim_AppendString(interp, resultObj, p, -1);
if (argc - i == 4) {
result = Jim_SetVariable(interp, varname, resultObj);
if (result == JIM_OK) {
Jim_SetResultInt(interp, num_matches);
}if (result == JIM_OK) { ... }
else {
Jim_FreeObj(interp, resultObj);
}else { ... }
}if (argc - i == 4) { ... }
else {
Jim_SetResult(interp, resultObj);
result = JIM_OK;
}else { ... }
return result;
}{ ... }
int Jim_regexpInit(Jim_Interp *interp)
{
Jim_PackageProvideCheck(interp, "regexp");
Jim_CreateCommand(interp, "regexp", Jim_RegexpCmd, NULL, NULL);
Jim_CreateCommand(interp, "regsub", Jim_RegsubCmd, NULL, NULL);
return JIM_OK;
}{ ... }