casa
$Rev:20696$
|
00001 /* 00002 cregex.h: Extended regular expression matching and search library 00003 Copyright (C) 1993,1994,1995,1997,1999,2001 00004 Associated Universities, Inc. Washington DC, USA. 00005 00006 This library is free software; you can redistribute it and/or modify it 00007 under the terms of the GNU Library General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or (at your 00009 option) any later version. 00010 00011 This library is distributed in the hope that it will be useful, but WITHOUT 00012 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00013 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00014 License for more details. 00015 00016 You should have received a copy of the GNU Library General Public License 00017 along with this library; if not, write to the Free Software Foundation, 00018 Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00019 00020 Correspondence concerning AIPS++ should be addressed as follows: 00021 Internet email: aips2-request@nrao.edu. 00022 Postal address: AIPS++ Project Office 00023 National Radio Astronomy Observatory 00024 520 Edgemont Road 00025 Charlottesville, VA 22903-2475 USA 00026 00027 $Id: cregex.h 20901 2010-06-09 07:23:37Z gervandiepen $ 00028 */ 00029 00030 #ifndef CASA_CREGEX_H 00031 #define CASA_CREGEX_H 00032 00033 /* Definitions for data structures callers pass the regex library. */ 00034 00035 #ifdef __cplusplus 00036 extern "C" { 00037 #endif 00038 00039 /* Define number of parens for which we record the beginnings and ends. 00040 This affects how much space the `struct re_registers' type takes up. */ 00041 #ifndef RE_NREGS 00042 #define RE_NREGS 32 00043 #endif 00044 00045 #define BYTEWIDTH 8 00046 00047 00048 #include <casa/aips.h> 00049 00050 namespace casa { //# NAMESPACE CASA - BEGIN 00051 00052 /* Maximum number of duplicates an interval can allow. */ 00053 /* Has been changed to copy from the limits.h file 00054 #if defined(_AIX) || defined(__sgi) 00055 # undef RE_DUP_MAX 00056 #endif 00057 #define RE_DUP_MAX ((1 << 15) - 1) 00058 */ 00059 00060 /* This defines the various regexp syntaxes. 00061 It can be set using the function a2_re_set_syntax. */ 00063 00064 00065 /* The following bits are used in the obscure_syntax variable to choose among 00066 alternative regexp syntaxes. */ 00067 00068 /* If this bit is set, plain parentheses serve as grouping, and backslash 00069 parentheses are needed for literal searching. 00070 If not set, backslash-parentheses are grouping, and plain parentheses 00071 are for literal searching. */ 00072 #define RE_NO_BK_PARENS 1 00073 00074 /* If this bit is set, plain | serves as the `or'-operator, and \| is a 00075 literal. 00076 If not set, \| serves as the `or'-operator, and | is a literal. */ 00077 #define RE_NO_BK_VBAR (1 << 1) 00078 00079 /* If this bit is not set, plain + or ? serves as an operator, and \+, \? are 00080 literals. 00081 If set, \+, \? are operators and plain +, ? are literals. */ 00082 #define RE_BK_PLUS_QM (1 << 2) 00083 00084 /* If this bit is set, | binds tighter than ^ or $. 00085 If not set, the contrary. */ 00086 #define RE_TIGHT_VBAR (1 << 3) 00087 00088 /* If this bit is set, then treat newline as an OR operator. 00089 If not set, treat it as a normal character. */ 00090 #define RE_NEWLINE_OR (1 << 4) 00091 00092 /* If this bit is set, then special characters may act as normal 00093 characters in some contexts. Specifically, this applies to: 00094 ^ -- only special at the beginning, or after ( or |; 00095 $ -- only special at the end, or before ) or |; 00096 *, +, ? -- only special when not after the beginning, (, or |. 00097 If this bit is not set, special characters (such as *, ^, and $) 00098 always have their special meaning regardless of the surrounding 00099 context. */ 00100 #define RE_CONTEXT_INDEP_OPS (1 << 5) 00101 00102 /* If this bit is not set, then \ before anything inside [ and ] is taken as 00103 a real \. 00104 If set, then such a \ escapes the following character. This is a 00105 special case for awk. */ 00106 #define RE_AWK_CLASS_HACK (1 << 6) 00107 00108 /* If this bit is set, then \{ and \} or { and } serve as interval operators. 00109 If not set, then \{ and \} and { and } are treated as literals. */ 00110 #define RE_INTERVALS (1 << 7) 00111 00112 /* If this bit is not set, then \{ and \} serve as interval operators and 00113 { and } are literals. 00114 If set, then { and } serve as interval operators and \{ and \} are 00115 literals. */ 00116 #define RE_NO_BK_CURLY_BRACES (1 << 8) 00117 00118 /* If this bit is set, then character classes are supported; they are: 00119 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 00120 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 00121 If not set, then character classes are not supported. */ 00122 #define RE_CHAR_CLASSES (1 << 9) 00123 00124 /* If this bit is set, then the dot re doesn't match a null byte. 00125 If not set, it does. */ 00126 #define RE_DOT_NOT_NULL (1 << 10) 00127 00128 /* If this bit is set, then [^...] doesn't match a newline. 00129 If not set, it does. */ 00130 #define RE_HAT_NOT_NEWLINE (1 << 11) 00131 00132 /* If this bit is not set, back references are recognized. 00133 If set, they aren't. */ 00134 #define RE_NO_BK_REFS (1 << 12) 00135 00136 /* If this bit is set, back references must refer to a preceding 00137 subexpression. If not set, a back reference to a nonexistent 00138 subexpression is treated as literal characters. */ 00139 #define RE_NO_EMPTY_BK_REF (1 << 13) 00140 00141 /* If this bit is set, bracket expressions can't be empty. 00142 If it is set, they can be empty. */ 00143 #define RE_NO_EMPTY_BRACKETS (1 << 14) 00144 00145 /* If this bit is set, then *, +, ? and { cannot be first in an re or 00146 immediately after a |, or a (. Furthermore, a | cannot be first or 00147 last in an re, or immediately follow another | or a (. Also, a ^ 00148 cannot appear in a nonleading position and a $ cannot appear in a 00149 nontrailing position (outside of bracket expressions, that is). */ 00150 #define RE_CONTEXTUAL_INVALID_OPS (1 << 15) 00151 00152 /* If this bit is set, then +, ? and | aren't recognized as operators. 00153 If it's not, they are. */ 00154 #define RE_LIMITED_OPS (1 << 16) 00155 00156 /* If this bit is set, then an ending range point has to collate higher 00157 or equal to the starting range point. 00158 If it's not set, then when the ending range point collates higher 00159 than the starting range point, the range is just considered empty. */ 00160 #define RE_NO_EMPTY_RANGES (1 << 17) 00161 00162 /* If this bit is set, then a hyphen (-) can't be an ending range point. 00163 If it isn't, then it can. */ 00164 #define RE_NO_HYPHEN_RANGE_END (1 << 18) 00165 00166 00167 /* Define combinations of bits for the standard possibilities. */ 00168 #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 00169 | RE_CONTEXT_INDEP_OPS) 00170 #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 00171 | RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK) 00172 #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 00173 | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) 00174 #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) 00175 #define RE_SYNTAX_EMACS 0 00176 #define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \ 00177 | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ 00178 | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \ 00179 | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \ 00180 | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END) 00181 00182 #define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \ 00183 | RE_NO_BK_VBAR | RE_NO_BK_PARENS \ 00184 | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \ 00185 | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \ 00186 | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \ 00187 | RE_NO_HYPHEN_RANGE_END) 00188 00189 00190 /* This data structure is used to represent a compiled pattern. */ 00191 00192 // <summary> 00193 // This data structure is used to represent a compiled pattern. 00194 // </summary> 00195 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos=""> 00196 // </reviewed> 00197 // <synopsis> 00198 // This data structure is used to represent a compiled pattern. 00199 // It is used by the regular expression functions in cregex.cc. 00200 // </synopsis> 00201 00202 struct re_pattern_buffer 00203 { 00204 char *buffer; /* Space holding the compiled pattern commands. */ 00205 long allocated; /* Size of space that `buffer' points to. */ 00206 long used; /* Length of portion of buffer actually occupied */ 00207 char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ 00208 /* a2_re_search uses the fastmap, if there is one, 00209 to skip over totally implausible characters. */ 00210 char *translate; /* Translate table to apply to all characters before 00211 comparing, or zero for no translation. 00212 The translation is applied to a pattern when it is 00213 compiled and to data when it is matched. */ 00214 char fastmap_accurate; 00215 /* Set to zero when a new pattern is stored, 00216 set to one when the fastmap is updated from it. */ 00217 char can_be_null; /* Set to one by compiling fastmap 00218 if this pattern might match the null string. 00219 It does not necessarily match the null string 00220 in that case, but if this is zero, it cannot. 00221 2 as value means can match null string 00222 but at end of range or before a character 00223 listed in the fastmap. */ 00224 }; 00225 00226 00227 /* search.c (search_buffer) needs this one value. It is defined both in 00228 regex.c and here. */ 00229 #define RE_EXACTN_VALUE 1 00230 00231 00232 /* Structure to store register contents data in. 00233 00234 Pass the address of such a structure as an argument to a2_re_match, etc., 00235 if you want this information back. 00236 00237 For i from 1 to RE_NREGS - 1, start[i] records the starting index in 00238 the string of where the ith subexpression matched, and end[i] records 00239 one after the ending index. start[0] and end[0] are analogous, for 00240 the entire pattern. */ 00241 00242 // <summary> 00243 // Data structure to store register contents data in. 00244 // </summary> 00245 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos=""> 00246 // </reviewed> 00247 // <synopsis> 00248 // This data structure is used to store register contents data in. 00249 // It is used by the regular expression functions in cregex.cc. 00250 // </synopsis> 00251 00252 struct re_registers 00253 { 00254 int start[RE_NREGS]; 00255 int end[RE_NREGS]; 00256 }; 00257 00258 00259 00260 #if defined(__STDC__) || defined(__cplusplus) 00261 00262 extern const char *a2_re_compile_pattern (char *, int, struct re_pattern_buffer *); 00263 extern int a2_re_set_syntax (int syntax); 00264 /* Is this really advertised? */ 00265 extern void a2_re_compile_fastmap (struct re_pattern_buffer *); 00266 extern int a2_re_search (struct re_pattern_buffer *, char*, int, int, int, 00267 struct re_registers *); 00268 extern int a2_re_search_2 (struct re_pattern_buffer *, char *, int, 00269 char *, int, int, int, 00270 struct re_registers *, int); 00271 extern int a2_re_match (struct re_pattern_buffer *, char *, int, int, 00272 struct re_registers *); 00273 extern int a2_re_match_2 (struct re_pattern_buffer *, char *, int, 00274 char *, int, int, struct re_registers *, int); 00275 00276 /* 4.2 bsd compatibility. */ 00277 // extern const char *re_comp (char *); 00278 // extern int re_exec (char *); 00279 00280 #else /* !__STDC__ */ 00281 00282 extern const char *a2_re_compile_pattern (); 00283 /* Is this really advertised? */ 00284 extern void a2_re_compile_fastmap (); 00285 extern int a2_re_search (), a2_re_search_2 (); 00286 extern int a2_re_match (), a2_re_match_2 (); 00287 00288 /* 4.2 bsd compatibility. */ 00289 extern const char *re_comp (); 00290 extern int re_exec (); 00291 00292 #endif /* __STDC__ */ 00293 00294 00295 #ifdef SYNTAX_TABLE 00296 00297 #endif 00298 00299 #ifdef __cplusplus 00300 00301 } 00302 #endif 00303 00304 00305 } //# NAMESPACE CASA - END 00306 00307 #endif /* !__REGEXP_LIBRARY */