00001
00013 #include <cerrno>
00014
00015 #include <iostream>
00016 #include <fstream>
00017
00018 #include "string_manip.h"
00019
00020 #include "op_regex.h"
00021
00022 using namespace std;
00023
00024 namespace {
00025
00026 string op_regerror(int err, regex_t const & regexp)
00027 {
00028 size_t needed_size = regerror(err, ®exp, 0, 0);
00029 char * buffer = new char[needed_size];
00030 regerror(err, ®exp, buffer, needed_size);
00031
00032 return buffer;
00033 }
00034
00035
00036 void op_regcomp(regex_t & regexp, string const & pattern)
00037 {
00038 int err = regcomp(®exp, pattern.c_str(), REG_EXTENDED);
00039 if (err) {
00040 throw bad_regex("regcomp error: " + op_regerror(err, regexp)
00041 + " for pattern : " + pattern);
00042 }
00043 }
00044
00045
00046 bool op_regexec(regex_t const & regex, string const & str, regmatch_t * match,
00047 size_t nmatch)
00048 {
00049 return regexec(®ex, str.c_str(), nmatch, match, 0) != REG_NOMATCH;
00050 }
00051
00052
00053 void op_regfree(regex_t & regexp)
00054 {
00055 regfree(®exp);
00056 }
00057
00058
00059
00060
00061
00062 size_t subexpr_index(char ch)
00063 {
00064 if (isdigit(ch))
00065 return ch - '0';
00066 if (ch >= 'a' && ch <= 'z')
00067 return ch - 'a' + 10;
00068 return size_t(-1);
00069 }
00070
00071 }
00072
00073
00074 bad_regex::bad_regex(string const & pattern)
00075 : op_exception(pattern)
00076 {
00077 }
00078
00079
00080 regular_expression_replace::regular_expression_replace(size_t limit_,
00081 size_t limit_defs)
00082 :
00083 limit(limit_),
00084 limit_defs_expansion(limit_defs)
00085 {
00086 }
00087
00088
00089 regular_expression_replace::~regular_expression_replace()
00090 {
00091 for (size_t i = 0 ; i < regex_replace.size() ; ++i)
00092 op_regfree(regex_replace[i].regexp);
00093 }
00094
00095
00096 void regular_expression_replace::add_definition(string const & name,
00097 string const & definition)
00098 {
00099 defs[name] = expand_string(definition);
00100 }
00101
00102
00103 void regular_expression_replace::add_pattern(string const & pattern,
00104 string const & replace)
00105 {
00106 string expanded_pattern = expand_string(pattern);
00107
00108 regex_t regexp;
00109 op_regcomp(regexp, expanded_pattern);
00110 replace_t regex = { regexp, replace };
00111 regex_replace.push_back(regex);
00112 }
00113
00114
00115 string regular_expression_replace::expand_string(string const & input)
00116 {
00117 string last, expanded(input);
00118 size_t i = 0;
00119 for (i = 0 ; i < limit_defs_expansion ; ++i) {
00120 last = expanded;
00121 expanded = substitute_definition(last);
00122 if (expanded == last)
00123 break;
00124 }
00125
00126 if (i == limit_defs_expansion)
00127 throw bad_regex("too many substitution for: + input");
00128
00129 return last;
00130 }
00131
00132
00133 string regular_expression_replace::substitute_definition(string const & pattern)
00134 {
00135 string result;
00136 bool previous_is_escape = false;
00137
00138 for (size_t i = 0 ; i < pattern.length() ; ++i) {
00139 if (pattern[i] == '$' && !previous_is_escape) {
00140 size_t pos = pattern.find('{', i);
00141 if (pos != i + 1) {
00142 throw bad_regex("invalid $ in pattern: " + pattern);
00143 }
00144 size_t end = pattern.find('}', i);
00145 if (end == string::npos) {
00146 throw bad_regex("no matching '}' in pattern: " + pattern);
00147 }
00148 string def_name = pattern.substr(pos+1, (end-pos) - 1);
00149 if (defs.find(def_name) == defs.end()) {
00150 throw bad_regex("definition not found and used in pattern: ("
00151 + def_name + ") " + pattern);
00152 }
00153 result += defs[def_name];
00154 i = end;
00155 } else {
00156 if (pattern[i] == '\\' && !previous_is_escape)
00157 previous_is_escape = true;
00158 else
00159 previous_is_escape = false;
00160 result += pattern[i];
00161 }
00162 }
00163
00164 return result;
00165 }
00166
00167
00168
00169
00170 bool regular_expression_replace::execute(string & str) const
00171 {
00172 bool changed = true;
00173 for (size_t nr_iter = 0; changed && nr_iter < limit ; ++nr_iter) {
00174 changed = false;
00175 for (size_t i = 0 ; i < regex_replace.size() ; ++i) {
00176 if (do_execute(str, regex_replace[i]))
00177 changed = true;
00178 }
00179 }
00180
00181
00182
00183 return changed == false;
00184 }
00185
00186
00187 bool regular_expression_replace::do_execute(string & str,
00188 replace_t const & regexp) const
00189 {
00190 bool changed = false;
00191
00192 regmatch_t match[max_match];
00193 for (size_t iter = 0;
00194 op_regexec(regexp.regexp, str, match, max_match) && iter < limit;
00195 iter++) {
00196 changed = true;
00197 do_replace(str, regexp.replace, match);
00198 }
00199
00200 return changed;
00201 }
00202
00203
00204 regmatch_t const &
00205 regular_expression_replace::get_match(regmatch_t const * match, char idx) const
00206 {
00207 size_t sub_expr = subexpr_index(idx);
00208 if (sub_expr == size_t(-1))
00209 throw bad_regex("expect group index: " + idx);
00210 if (sub_expr >= max_match)
00211 throw bad_regex("illegal group index :" + idx);
00212 return match[sub_expr];
00213 }
00214
00215 void regular_expression_replace::do_replace
00216 (string & str, string const & replace, regmatch_t const * match) const
00217 {
00218 string inserted;
00219 for (size_t i = 0 ; i < replace.length() ; ++i) {
00220 if (replace[i] == '\\') {
00221 if (i == replace.length() - 1) {
00222 throw bad_regex("illegal \\ trailer: " +
00223 replace);
00224 }
00225 ++i;
00226 if (replace[i] == '\\') {
00227 inserted += '\\';
00228 } else {
00229 regmatch_t const & matched = get_match(match,
00230 replace[i]);
00231 if (matched.rm_so == -1 &&
00232 matched.rm_eo == -1) {
00233
00234 } else if (matched.rm_so == -1 ||
00235 matched.rm_eo == -1) {
00236 throw bad_regex("illegal match: " +
00237 replace);
00238 } else {
00239 inserted += str.substr(matched.rm_so,
00240 matched.rm_eo - matched.rm_so);
00241 }
00242 }
00243 } else {
00244 inserted += replace[i];
00245 }
00246 }
00247
00248 size_t first = match[0].rm_so;
00249 size_t count = match[0].rm_eo - match[0].rm_so;
00250
00251 str.replace(first, count, inserted);
00252 }
00253
00254
00255 void setup_regex(regular_expression_replace & regex,
00256 string const & filename)
00257 {
00258 ifstream in(filename.c_str());
00259 if (!in) {
00260 throw op_runtime_error("Can't open file " + filename +
00261 " for reading", errno);
00262 }
00263
00264 regular_expression_replace var_name_rule;
00265 var_name_rule.add_pattern("^\\$([_a-zA-Z][_a-zA-Z0-9]*)[ ]*=.*", "\\1");
00266 regular_expression_replace var_value_rule;
00267 var_value_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
00268
00269 regular_expression_replace left_rule;
00270 left_rule.add_pattern("[ ]*\"(.*)\"[ ]*=.*", "\\1");
00271 regular_expression_replace right_rule;
00272 right_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
00273
00274 string line;
00275 while (getline(in, line)) {
00276 line = trim(line);
00277 if (line.empty() || line[0] == '#')
00278 continue;
00279
00280 string temp = line;
00281 var_name_rule.execute(temp);
00282 if (temp == line) {
00283 string left = line;
00284 left_rule.execute(left);
00285 if (left == line) {
00286 throw bad_regex("invalid input file: \"" + line + '"');
00287 }
00288
00289 string right = line;
00290 right_rule.execute(right);
00291 if (right == line) {
00292 throw bad_regex("invalid input file: \"" + line + '"');
00293 }
00294
00295 regex.add_pattern(left, right);
00296 } else {
00297
00298
00299 string var_name = temp;
00300 string var_value = line;
00301 var_value_rule.execute(var_value);
00302 if (var_value == line) {
00303 throw bad_regex("invalid input file: \"" + line + '"');
00304 }
00305
00306 regex.add_definition(var_name, var_value);
00307 }
00308 }
00309 }