From 625060d503a1b7eb081c175122c6a73c1f7b2404 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 22 Jan 2007 13:30:10 +0000 Subject: added and documented regex functions from search_regex.class --- it.class | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) (limited to 'it.class') diff --git a/it.class b/it.class index 149ec04..f865858 100644 --- a/it.class +++ b/it.class @@ -163,6 +163,105 @@ function toascii($text) array('ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue', 'Ä' => 'Ae', 'Ö' => 'Oe', 'Ü' => 'Ue', 'æ' => 'ae', 'Æ' => 'Ae', 'ß' => 'ss')); } + +/** + * Convert regex for preg (redefines \w,\W,\b,\B, adds and escapes delimiter, adds modifiers) + * @param $pattern Regex to convert + * @param named parameter casesensitive Regex is case sensitive (omit modifier i) + * @param named parameter multiline add modifier m + * @param named parameter singleline add modifier s + * @param named parameter utf8 add modifier u + * @param named parameter extended add modifier x + * @return converted regex to use with preg + */ +function convertregex( $pattern, $p = array() ) +{ + $wordchar = 'a-zA-Z0-9_\xa0-\xff'; + $nonwordchar = '\x00-\x2f\x3a-\x40\x5B-\x60\x7b-\x9f'; + + #matches only even number of backslashes (double escaped for php and preg) + $nonesc = '(? "\$1$wordchar", + # \W in character class + "/($incharclass$nonesc)\\\\W/" => "\$1$nonwordchar", + # normal \w + "/($nonesc)\\\\w/" => "\$1[$wordchar]", + # normal \W + "/($nonesc)\\\\W/" => "\$1[$nonwordchar]", + # \b (use negative assertions to match at end of string) + "/($nonesc)\\\\b/" => "\$1(?:(? "\$1(?:(?<=[$wordchar])(?=[$wordchar])|(?<=[$nonwordchar])(?=[$nonwordchar]))", + ); + $pattern = preg_replace( array_keys( $replaces ), array_values( $replaces ), $pattern ); + if( ! $p['casesensitive'] ) + $modifiers .= 'i'; + foreach( array( + 'multiline' => 'm', + 'singleline' => 's', + 'utf8' => 'u', + 'extended' => 'x', + ) as $key => $mod ) + if( $p[$key] ) + $moifiers .= $mod; + return "/" . $pattern . "/" . $modifiers; +} + +/** + * Try to match string against regex. See convertregex for additional named parameters. + * @param $pattern Regex to match against + * @param $string String to match + * @param named parameter offset_capture Set flag preg_offset_capture (returns offsets with the matches). + * @param named parameter all Return every match as array instead of first match. + * @return Matched string or false + */ +function match( $pattern, $string, $p = array() ) +{ + $flags = 0; + if( $p['offset_capture'] ) + $flags |= PREG_OFFSET_CAPTURE; + if( $p['all'] ) + $r = preg_match_all( it::convertregex( $pattern, $p ), $string, $m, $flags | PREG_PATTERN_ORDER, $p['offset'] ); + else + $r = preg_match( it::convertregex( $pattern, $p ), $string, $m, $flags, $p['offset'] ); + # no match + if( !$r ) + return false; + # no capture + else if( count( $m ) == 1 ) + return $m[0]; + # one capture + else if( count( $m ) == 2 ) + return $m[1]; + # captures, reorder pattern_order to set_order but without first element + else if( $p['all'] && !$p['pattern_order'] ) + return call_user_func_array( 'array_map', array_merge( array( null ), array_slice( $m, 1 ) ) ); + # captures, don't return first element (matched string) + else + return array_slice( $m, 1 ); +} + +/** + * Replace parts of a string matched by a pattern with according replacement string. See convertregex for named parameters. + * @param $replacementes Array with patterns as keys and replacement strings as values. + * @param $string String to change. + * @return New string. + */ +function replace( $replacements, $string, $p = array() ) +{ + $patterns = array(); + foreach( array_keys( $replacements ) as $pat ) + $patterns[] = it::convertregex( $pat, $p ); + return preg_replace( $patterns, array_values( $replacements ), $string, isset( $p['limit'] ) ? $p['limit'] : -1 ); +} + } ?> -- cgit v1.2.3