UTRegExpSupport.h

Go to the documentation of this file.
00001 //==================================================================================================
00002 // Copyright (C) 2010  Brian Tietz    sdbtietz at yahoo dot com
00003 //
00004 // This program is free software; you can redistribute it and/or modify it under the terms of the
00005 // GNU General Public License as published by the Free Software Foundation, version 2.0 of the
00006 // License.
00007 //
00008 // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
00009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00010 // General Public License for more details.
00011 //
00012 // You should have received a copy of the GNU General Public License along with this program; if
00013 // not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00014 // 02110-1301, USA.
00015 //
00016 // For commercial software, the copyright holder (Brian Tietz, email sdbtietz at yahoo dot com)
00017 // reserves the right and is willing to waive the proprietary source code disclosure aspects of that
00018 // license as applied to the UT library in exchange for either substantial contributions to the
00019 // development of the UT library or other forms of compensation.  Any such waiver must be
00020 // established in writing between the copyright holder and the commercial entity obtaining such a
00021 // waiver.
00022 //==================================================================================================
00023 
00024 
00025 #ifndef _UT_REG_EXP_SUPPORT_H_
00026 #define _UT_REG_EXP_SUPPORT_H_
00027 
00028 // \cond DOXYGEN_DOCUMENT_NEVER
00029 
00030 
00031 //==================================================================================================
00032 //=== Project headers
00033 //==================================================================================================
00034 #include "UT.h"
00035 
00036 
00037 //==================================================================================================
00038 //=== Constants
00039 //==================================================================================================
00040 enum regexp_normal_seq_context_t
00041 {
00042     eRNSC_called_from_root,
00043     eRNSC_called_from_aggregate
00044 };
00045 
00046 
00047 //==================================================================================================
00048 class RegExpEvaluator_t
00049 //==================================================================================================
00050 {
00051     //----------------------------------------------------------------------------------------------
00052     public:
00053     //----------------------------------------------------------------------------------------------
00054     Status_t        Evaluate(   const utf8* expression,
00055                                 out int* expression_fail_point_chars,
00056                                 const utf8* match,
00057                                 out int* match_fail_pos_chars,
00058                                 bool force_start_match,
00059                                 out String_t* pre,
00060                                 bool force_end_match,
00061                                 out String_t* post,
00062                                 out String_t** substring_array,
00063                                 int substring_count );
00064 
00065     //----------------------------------------------------------------------------------------------
00066     private:
00067     //----------------------------------------------------------------------------------------------
00068     Status_t                    ProcessNormalSequence(regexp_normal_seq_context_t context);
00069     Status_t                    ProcessEscapeSequence();
00070     Status_t                    ExtractCountControl( out int* min_count, out int* max_count );
00071     Status_t                    ExtractType();
00072     Status_t                    ProcessAggregate();
00073 
00074     //----------------------------------------------------------------------------------------------
00075     private:
00076     //----------------------------------------------------------------------------------------------
00077     const utf8*                 m_expression;
00078     const utf8*                 m_match_N;              // Can be NULL
00079 };
00080 
00081 
00082 enum regexp_type_t
00083 {
00084     eRET_whitespace                     = 's',  // /s   (space, tab, carriage return, linefeed)
00085     eRET_numeric_digit                  = 'd',  // /d   (0-9)
00086     eRET_lowercase_hex                  = 'h',  // /h   (0-9,a-f)
00087     eRET_uppercase_hex                  = 'H',  // /H   (0-9,A-F)
00088     eRET_anycase_hex                    = 'x',  // /ih  (0-9,a-f,A-f)
00089     eRET_lowercase_letter               = 'c',  // /c   (a-z)
00090     eRET_uppercase_letter               = 'C',  // /C   (A-Z)
00091     eRET_anycase_letter                 = 'i',  // /ic  (a-z,A-Z)
00092     eRET_letter_or_above_ascii          = 'U',  // /U   (a-z,A-Z,non-ASCII UTF8)
00093     eRET_letter_number_or_above_ascii   = 'n',  // /nU  (0-9,a-z,A-Z,non-ASCII UTF8)
00094     eRET_token_character                = 't',  // /t   (a-z,A-Z,0-9,_)
00095     eRET_token_start_character          = 'T',  // /T   (a-z,A-Z,_)
00096     eRET_aggregation                    = '['   // [,]
00097 };
00098 
00099 
00100 // \endcond
00101 
00102 #endif // _UT_REG_EXP_SUPPORT_H_

Generated on Tue Dec 14 22:35:05 2010 for UT library by  doxygen 1.6.1