| Package pyparsing ::
        Module pyparsing |  | 
   1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21   
  22   
  23   
  24   
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.7" 
  62  __versionTime__ = "3 August 2012 05:00" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72   
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 'ungroup', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104  else: 
 105      _MAX_INT = sys.maxint 
 106      range = xrange 
 107      set = lambda s : dict( [(c,0) for c in s] ) 
 108   
 110          """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 
 111             str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 
 112             then < returns the unicode object | encodes it with the default encoding | ... >. 
 113          """ 
 114          if isinstance(obj,unicode): 
 115              return obj 
 116   
 117          try: 
 118               
 119               
 120              return str(obj) 
 121   
 122          except UnicodeEncodeError: 
 123               
 124               
 125               
 126               
 127               
 128              return unicode(obj) 
  129               
 130               
 131               
 132               
 133               
 134               
 135   
 136   
 137  singleArgBuiltins = [] 
 138  import __builtin__ 
 139  for fname in "sum len sorted reversed list tuple set any all min max".split(): 
 140      try: 
 141          singleArgBuiltins.append(getattr(__builtin__,fname)) 
 142      except AttributeError: 
 143          continue 
 144   
 146      """Escape &, <, >, ", ', etc. in a string of data.""" 
 147   
 148       
 149      from_symbols = '&><"\'' 
 150      to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 
 151      for from_,to_ in zip(from_symbols, to_symbols): 
 152          data = data.replace(from_, to_) 
 153      return data 
  154   
 157   
 158  alphas     = string.ascii_lowercase + string.ascii_uppercase 
 159  nums       = "0123456789" 
 160  hexnums    = nums + "ABCDEFabcdef" 
 161  alphanums  = alphas + nums 
 162  _bslash    = chr(92) 
 163  printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 
 164   
 166      """base exception class for all parsing runtime exceptions""" 
 167       
 168       
 169 -    def __init__( self, pstr, loc=0, msg=None, elem=None ): 
  170          self.loc = loc 
 171          if msg is None: 
 172              self.msg = pstr 
 173              self.pstr = "" 
 174          else: 
 175              self.msg = msg 
 176              self.pstr = pstr 
 177          self.parserElement = elem 
  178   
 180          """supported attributes by name are: 
 181              - lineno - returns the line number of the exception text 
 182              - col - returns the column number of the exception text 
 183              - line - returns the line containing the exception text 
 184          """ 
 185          if( aname == "lineno" ): 
 186              return lineno( self.loc, self.pstr ) 
 187          elif( aname in ("col", "column") ): 
 188              return col( self.loc, self.pstr ) 
 189          elif( aname == "line" ): 
 190              return line( self.loc, self.pstr ) 
 191          else: 
 192              raise AttributeError(aname) 
  193   
 195          return "%s (at char %d), (line:%d, col:%d)" % \ 
 196                  ( self.msg, self.loc, self.lineno, self.column ) 
  210          return "loc msg pstr parserElement lineno col line " \ 
 211                 "markInputline __str__ __repr__".split() 
   212   
 214      """exception thrown when parse expressions don't match class; 
 215         supported attributes by name are: 
 216          - lineno - returns the line number of the exception text 
 217          - col - returns the column number of the exception text 
 218          - line - returns the line containing the exception text 
 219      """ 
 220      pass 
  221   
 223      """user-throwable exception thrown when inconsistent parse content 
 224         is found; stops all parsing immediately""" 
 225      pass 
  226   
 228      """just like C{L{ParseFatalException}}, but thrown internally when an 
 229         C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 
 230         an unbacktrackable syntax error has been found""" 
  234   
 235   
 236       
 237          
 238           
 239           
 240          
 241          
 242          
 243          
 244       
 245           
 246           
 247   
 249      """exception thrown by C{validate()} if the grammar could be improperly recursive""" 
 250 -    def __init__( self, parseElementList ): 
  251          self.parseElementTrace = parseElementList 
  252   
 254          return "RecursiveGrammarException: %s" % self.parseElementTrace 
   255   
 262          return repr(self.tup) 
  264          self.tup = (self.tup[0],i) 
  267      """Structured parse results, to provide multiple means of access to the parsed data: 
 268         - as a list (C{len(results)}) 
 269         - by list index (C{results[0], results[1]}, etc.) 
 270         - by attribute (C{results.<resultsName>}) 
 271         """ 
 272       
 273 -    def __new__(cls, toklist, name=None, asList=True, modal=True ): 
  274          if isinstance(toklist, cls): 
 275              return toklist 
 276          retobj = object.__new__(cls) 
 277          retobj.__doinit = True 
 278          return retobj 
  279   
 280       
 281       
 282 -    def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): 
  283          if self.__doinit: 
 284              self.__doinit = False 
 285              self.__name = None 
 286              self.__parent = None 
 287              self.__accumNames = {} 
 288              if isinstance(toklist, list): 
 289                  self.__toklist = toklist[:] 
 290              else: 
 291                  self.__toklist = [toklist] 
 292              self.__tokdict = dict() 
 293   
 294          if name is not None and name: 
 295              if not modal: 
 296                  self.__accumNames[name] = 0 
 297              if isinstance(name,int): 
 298                  name = _ustr(name)  
 299              self.__name = name 
 300              if not toklist in (None,'',[]): 
 301                  if isinstance(toklist,basestring): 
 302                      toklist = [ toklist ] 
 303                  if asList: 
 304                      if isinstance(toklist,ParseResults): 
 305                          self[name] = _ParseResultsWithOffset(toklist.copy(),0) 
 306                      else: 
 307                          self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 
 308                      self[name].__name = name 
 309                  else: 
 310                      try: 
 311                          self[name] = toklist[0] 
 312                      except (KeyError,TypeError,IndexError): 
 313                          self[name] = toklist 
  314   
 316          if isinstance( i, (int,slice) ): 
 317              return self.__toklist[i] 
 318          else: 
 319              if i not in self.__accumNames: 
 320                  return self.__tokdict[i][-1][0] 
 321              else: 
 322                  return ParseResults([ v[0] for v in self.__tokdict[i] ]) 
  323   
 325          if isinstance(v,_ParseResultsWithOffset): 
 326              self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 
 327              sub = v[0] 
 328          elif isinstance(k,int): 
 329              self.__toklist[k] = v 
 330              sub = v 
 331          else: 
 332              self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 
 333              sub = v 
 334          if isinstance(sub,ParseResults): 
 335              sub.__parent = wkref(self) 
  336   
 338          if isinstance(i,(int,slice)): 
 339              mylen = len( self.__toklist ) 
 340              del self.__toklist[i] 
 341   
 342               
 343              if isinstance(i, int): 
 344                  if i < 0: 
 345                      i += mylen 
 346                  i = slice(i, i+1) 
 347               
 348              removed = list(range(*i.indices(mylen))) 
 349              removed.reverse() 
 350               
 351              for name in self.__tokdict: 
 352                  occurrences = self.__tokdict[name] 
 353                  for j in removed: 
 354                      for k, (value, position) in enumerate(occurrences): 
 355                          occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 
 356          else: 
 357              del self.__tokdict[i] 
  358   
 360          return k in self.__tokdict 
  361   
 362 -    def __len__( self ): return len( self.__toklist ) 
  363 -    def __bool__(self): return len( self.__toklist ) > 0 
  364      __nonzero__ = __bool__ 
 365 -    def __iter__( self ): return iter( self.__toklist ) 
  366 -    def __reversed__( self ): return iter( self.__toklist[::-1] ) 
  368          """Returns all named result keys.""" 
 369          return self.__tokdict.keys() 
  370   
 371 -    def pop( self, index=-1 ): 
  372          """Removes and returns item at specified index (default=last). 
 373             Will work with either numeric indices or dict-key indicies.""" 
 374          ret = self[index] 
 375          del self[index] 
 376          return ret 
  377   
 378 -    def get(self, key, defaultValue=None): 
  379          """Returns named result matching the given key, or if there is no 
 380             such name, then returns the given C{defaultValue} or C{None} if no 
 381             C{defaultValue} is specified.""" 
 382          if key in self: 
 383              return self[key] 
 384          else: 
 385              return defaultValue 
  386   
 387 -    def insert( self, index, insStr ): 
  388          """Inserts new element at location index in the list of parsed tokens.""" 
 389          self.__toklist.insert(index, insStr) 
 390           
 391          for name in self.__tokdict: 
 392              occurrences = self.__tokdict[name] 
 393              for k, (value, position) in enumerate(occurrences): 
 394                  occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 
  395   
 397          """Returns all named result keys and values as a list of tuples.""" 
 398          return [(k,self[k]) for k in self.__tokdict] 
  399   
 401          """Returns all named result values.""" 
 402          return [ v[-1][0] for v in self.__tokdict.values() ] 
  403   
 405          if True:  
 406              if name in self.__tokdict: 
 407                  if name not in self.__accumNames: 
 408                      return self.__tokdict[name][-1][0] 
 409                  else: 
 410                      return ParseResults([ v[0] for v in self.__tokdict[name] ]) 
 411              else: 
 412                  return "" 
 413          return None 
  414   
 416          ret = self.copy() 
 417          ret += other 
 418          return ret 
  419   
 421          if other.__tokdict: 
 422              offset = len(self.__toklist) 
 423              addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 
 424              otheritems = other.__tokdict.items() 
 425              otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 
 426                                  for (k,vlist) in otheritems for v in vlist] 
 427              for k,v in otherdictitems: 
 428                  self[k] = v 
 429                  if isinstance(v[0],ParseResults): 
 430                      v[0].__parent = wkref(self) 
 431               
 432          self.__toklist += other.__toklist 
 433          self.__accumNames.update( other.__accumNames ) 
 434          return self 
  435   
 437          if isinstance(other,int) and other == 0: 
 438              return self.copy() 
  439           
 441          return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 
  442   
 444          out = [] 
 445          for i in self.__toklist: 
 446              if isinstance(i, ParseResults): 
 447                  out.append(_ustr(i)) 
 448              else: 
 449                  out.append(repr(i)) 
 450          return '[' + ', '.join(out) + ']' 
  451   
 453          out = [] 
 454          for item in self.__toklist: 
 455              if out and sep: 
 456                  out.append(sep) 
 457              if isinstance( item, ParseResults ): 
 458                  out += item._asStringList() 
 459              else: 
 460                  out.append( _ustr(item) ) 
 461          return out 
  462   
 464          """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 
 465          out = [] 
 466          for res in self.__toklist: 
 467              if isinstance(res,ParseResults): 
 468                  out.append( res.asList() ) 
 469              else: 
 470                  out.append( res ) 
 471          return out 
  472   
 474          """Returns the named parse results as dictionary.""" 
 475          return dict( self.items() ) 
  476   
 478          """Returns a new copy of a C{ParseResults} object.""" 
 479          ret = ParseResults( self.__toklist ) 
 480          ret.__tokdict = self.__tokdict.copy() 
 481          ret.__parent = self.__parent 
 482          ret.__accumNames.update( self.__accumNames ) 
 483          ret.__name = self.__name 
 484          return ret 
  485   
 486 -    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 
  487          """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 
 488          nl = "\n" 
 489          out = [] 
 490          namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 
 491                                                              for v in vlist ] ) 
 492          nextLevelIndent = indent + "  " 
 493   
 494           
 495          if not formatted: 
 496              indent = "" 
 497              nextLevelIndent = "" 
 498              nl = "" 
 499   
 500          selfTag = None 
 501          if doctag is not None: 
 502              selfTag = doctag 
 503          else: 
 504              if self.__name: 
 505                  selfTag = self.__name 
 506   
 507          if not selfTag: 
 508              if namedItemsOnly: 
 509                  return "" 
 510              else: 
 511                  selfTag = "ITEM" 
 512   
 513          out += [ nl, indent, "<", selfTag, ">" ] 
 514   
 515          worklist = self.__toklist 
 516          for i,res in enumerate(worklist): 
 517              if isinstance(res,ParseResults): 
 518                  if i in namedItems: 
 519                      out += [ res.asXML(namedItems[i], 
 520                                          namedItemsOnly and doctag is None, 
 521                                          nextLevelIndent, 
 522                                          formatted)] 
 523                  else: 
 524                      out += [ res.asXML(None, 
 525                                          namedItemsOnly and doctag is None, 
 526                                          nextLevelIndent, 
 527                                          formatted)] 
 528              else: 
 529                   
 530                  resTag = None 
 531                  if i in namedItems: 
 532                      resTag = namedItems[i] 
 533                  if not resTag: 
 534                      if namedItemsOnly: 
 535                          continue 
 536                      else: 
 537                          resTag = "ITEM" 
 538                  xmlBodyText = _xml_escape(_ustr(res)) 
 539                  out += [ nl, nextLevelIndent, "<", resTag, ">", 
 540                                                  xmlBodyText, 
 541                                                  "</", resTag, ">" ] 
 542   
 543          out += [ nl, indent, "</", selfTag, ">" ] 
 544          return "".join(out) 
  545   
 547          for k,vlist in self.__tokdict.items(): 
 548              for v,loc in vlist: 
 549                  if sub is v: 
 550                      return k 
 551          return None 
  552   
 554          """Returns the results name for this token expression.""" 
 555          if self.__name: 
 556              return self.__name 
 557          elif self.__parent: 
 558              par = self.__parent() 
 559              if par: 
 560                  return par.__lookup(self) 
 561              else: 
 562                  return None 
 563          elif (len(self) == 1 and 
 564                 len(self.__tokdict) == 1 and 
 565                 self.__tokdict.values()[0][0][1] in (0,-1)): 
 566              return self.__tokdict.keys()[0] 
 567          else: 
 568              return None 
  569   
 570 -    def dump(self,indent='',depth=0): 
  571          """Diagnostic method for listing out the contents of a C{ParseResults}. 
 572             Accepts an optional C{indent} argument so that this string can be embedded 
 573             in a nested display of other data.""" 
 574          out = [] 
 575          out.append( indent+_ustr(self.asList()) ) 
 576          keys = self.items() 
 577          keys.sort() 
 578          for k,v in keys: 
 579              if out: 
 580                  out.append('\n') 
 581              out.append( "%s%s- %s: " % (indent,('  '*depth), k) ) 
 582              if isinstance(v,ParseResults): 
 583                  if v.keys(): 
 584                      out.append( v.dump(indent,depth+1) ) 
 585                  else: 
 586                      out.append(_ustr(v)) 
 587              else: 
 588                  out.append(_ustr(v)) 
 589          return "".join(out) 
  590   
 591       
 593          return ( self.__toklist, 
 594                   ( self.__tokdict.copy(), 
 595                     self.__parent is not None and self.__parent() or None, 
 596                     self.__accumNames, 
 597                     self.__name ) ) 
  598   
 600          self.__toklist = state[0] 
 601          (self.__tokdict, 
 602           par, 
 603           inAccumNames, 
 604           self.__name) = state[1] 
 605          self.__accumNames = {} 
 606          self.__accumNames.update(inAccumNames) 
 607          if par is not None: 
 608              self.__parent = wkref(par) 
 609          else: 
 610              self.__parent = None 
  611   
  614   
 616      """Returns current column within a string, counting newlines as line separators. 
 617     The first column is number 1. 
 618   
 619     Note: the default parsing behavior is to expand tabs in the input string 
 620     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 621     on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 622     consistent view of the parsed string, the parse location, and line and column 
 623     positions within the parsed string. 
 624     """ 
 625      return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 
  626   
 628      """Returns current line number within a string, counting newlines as line separators. 
 629     The first line is number 1. 
 630   
 631     Note: the default parsing behavior is to expand tabs in the input string 
 632     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 633     on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 634     consistent view of the parsed string, the parse location, and line and column 
 635     positions within the parsed string. 
 636     """ 
 637      return strg.count("\n",0,loc) + 1 
  638   
 639 -def line( loc, strg ): 
  640      """Returns the line of text containing loc within a string, counting newlines as line separators. 
 641         """ 
 642      lastCR = strg.rfind("\n", 0, loc) 
 643      nextCR = strg.find("\n", loc) 
 644      if nextCR >= 0: 
 645          return strg[lastCR+1:nextCR] 
 646      else: 
 647          return strg[lastCR+1:] 
  648   
 650      print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 
  651   
 653      print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 
  654   
 656      print ("Exception raised:" + _ustr(exc)) 
  657   
 659      """'Do-nothing' debug action, to suppress debugging output during parsing.""" 
 660      pass 
  661   
 662  'decorator to trim function calls to match the arity of the target' 
 663  if not _PY3K: 
 665          if func in singleArgBuiltins: 
 666              return lambda s,l,t: func(t) 
 667          limit = [0] 
 668          def wrapper(*args): 
 669              while 1: 
 670                  try: 
 671                      return func(*args[limit[0]:]) 
 672                  except TypeError: 
 673                      if limit[0] <= maxargs: 
 674                          limit[0] += 1 
 675                          continue 
 676                      raise 
  677          return wrapper 
 678  else: 
 680          if func in singleArgBuiltins: 
 681              return lambda s,l,t: func(t) 
 682          limit = maxargs 
 683          def wrapper(*args): 
 684               
 685              while 1: 
 686                  try: 
 687                      return func(*args[limit:]) 
 688                  except TypeError: 
 689                      if limit: 
 690                          limit -= 1 
 691                          continue 
 692                      raise 
  693          return wrapper 
 694       
 696      """Abstract base level parser element class.""" 
 697      DEFAULT_WHITE_CHARS = " \n\t\r" 
 698      verbose_stacktrace = False 
 699   
 704      setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 
 705   
 707          """ 
 708          Set class to be used for inclusion of string literals into a parser. 
 709          """ 
 710          ParserElement.literalStringClass = cls 
  711      inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) 
 712   
 714          self.parseAction = list() 
 715          self.failAction = None 
 716           
 717          self.strRepr = None 
 718          self.resultsName = None 
 719          self.saveAsList = savelist 
 720          self.skipWhitespace = True 
 721          self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 722          self.copyDefaultWhiteChars = True 
 723          self.mayReturnEmpty = False  
 724          self.keepTabs = False 
 725          self.ignoreExprs = list() 
 726          self.debug = False 
 727          self.streamlined = False 
 728          self.mayIndexError = True  
 729          self.errmsg = "" 
 730          self.modalResults = True  
 731          self.debugActions = ( None, None, None )  
 732          self.re = None 
 733          self.callPreparse = True  
 734          self.callDuringTry = False 
  735   
 737          """Make a copy of this C{ParserElement}.  Useful for defining different parse actions 
 738             for the same parsing pattern, using copies of the original parse element.""" 
 739          cpy = copy.copy( self ) 
 740          cpy.parseAction = self.parseAction[:] 
 741          cpy.ignoreExprs = self.ignoreExprs[:] 
 742          if self.copyDefaultWhiteChars: 
 743              cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 744          return cpy 
  745   
 747          """Define name for this expression, for use in debugging.""" 
 748          self.name = name 
 749          self.errmsg = "Expected " + self.name 
 750          if hasattr(self,"exception"): 
 751              self.exception.msg = self.errmsg 
 752          return self 
  753   
 755          """Define name for referencing matching tokens as a nested attribute 
 756             of the returned parse results. 
 757             NOTE: this returns a *copy* of the original C{ParserElement} object; 
 758             this is so that the client can define a basic element, such as an 
 759             integer, and reference it in multiple places with different names. 
 760              
 761             You can also set results names using the abbreviated syntax, 
 762             C{expr("name")} in place of C{expr.setResultsName("name")} -  
 763             see L{I{__call__}<__call__>}. 
 764          """ 
 765          newself = self.copy() 
 766          if name.endswith("*"): 
 767              name = name[:-1] 
 768              listAllMatches=True 
 769          newself.resultsName = name 
 770          newself.modalResults = not listAllMatches 
 771          return newself 
  772   
 774          """Method to invoke the Python pdb debugger when this element is 
 775             about to be parsed. Set C{breakFlag} to True to enable, False to 
 776             disable. 
 777          """ 
 778          if breakFlag: 
 779              _parseMethod = self._parse 
 780              def breaker(instring, loc, doActions=True, callPreParse=True): 
 781                  import pdb 
 782                  pdb.set_trace() 
 783                  return _parseMethod( instring, loc, doActions, callPreParse ) 
  784              breaker._originalParseMethod = _parseMethod 
 785              self._parse = breaker 
 786          else: 
 787              if hasattr(self._parse,"_originalParseMethod"): 
 788                  self._parse = self._parse._originalParseMethod 
 789          return self 
  790   
 792          """Define action to perform when successfully matching parse element definition. 
 793             Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 
 794             C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 
 795              - s   = the original string being parsed (see note below) 
 796              - loc = the location of the matching substring 
 797              - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 
 798             If the functions in fns modify the tokens, they can return them as the return 
 799             value from fn, and the modified list of tokens will replace the original. 
 800             Otherwise, fn does not need to return any value. 
 801   
 802             Note: the default parsing behavior is to expand tabs in the input string 
 803             before starting the parsing process.  See L{I{parseString}<parseString>} for more information 
 804             on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 805             consistent view of the parsed string, the parse location, and line and column 
 806             positions within the parsed string. 
 807             """ 
 808          self.parseAction = list(map(_trim_arity, list(fns))) 
 809          self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 810          return self 
  811   
 813          """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 
 814          self.parseAction += list(map(_trim_arity, list(fns))) 
 815          self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 816          return self 
  817   
 819          """Define action to perform if parsing fails at this expression. 
 820             Fail acton fn is a callable function that takes the arguments 
 821             C{fn(s,loc,expr,err)} where: 
 822              - s = string being parsed 
 823              - loc = location where expression match was attempted and failed 
 824              - expr = the parse expression that failed 
 825              - err = the exception thrown 
 826             The function returns no value.  It may throw C{L{ParseFatalException}} 
 827             if it is desired to stop parsing immediately.""" 
 828          self.failAction = fn 
 829          return self 
  830   
 832          exprsFound = True 
 833          while exprsFound: 
 834              exprsFound = False 
 835              for e in self.ignoreExprs: 
 836                  try: 
 837                      while 1: 
 838                          loc,dummy = e._parse( instring, loc ) 
 839                          exprsFound = True 
 840                  except ParseException: 
 841                      pass 
 842          return loc 
  843   
 845          if self.ignoreExprs: 
 846              loc = self._skipIgnorables( instring, loc ) 
 847   
 848          if self.skipWhitespace: 
 849              wt = self.whiteChars 
 850              instrlen = len(instring) 
 851              while loc < instrlen and instring[loc] in wt: 
 852                  loc += 1 
 853   
 854          return loc 
  855   
 856 -    def parseImpl( self, instring, loc, doActions=True ): 
  858   
 859 -    def postParse( self, instring, loc, tokenlist ): 
  861   
 862       
 863 -    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 
  864          debugging = ( self.debug )  
 865   
 866          if debugging or self.failAction: 
 867               
 868              if (self.debugActions[0] ): 
 869                  self.debugActions[0]( instring, loc, self ) 
 870              if callPreParse and self.callPreparse: 
 871                  preloc = self.preParse( instring, loc ) 
 872              else: 
 873                  preloc = loc 
 874              tokensStart = preloc 
 875              try: 
 876                  try: 
 877                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 878                  except IndexError: 
 879                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 880              except ParseBaseException: 
 881                   
 882                  err = None 
 883                  if self.debugActions[2]: 
 884                      err = sys.exc_info()[1] 
 885                      self.debugActions[2]( instring, tokensStart, self, err ) 
 886                  if self.failAction: 
 887                      if err is None: 
 888                          err = sys.exc_info()[1] 
 889                      self.failAction( instring, tokensStart, self, err ) 
 890                  raise 
 891          else: 
 892              if callPreParse and self.callPreparse: 
 893                  preloc = self.preParse( instring, loc ) 
 894              else: 
 895                  preloc = loc 
 896              tokensStart = preloc 
 897              if self.mayIndexError or loc >= len(instring): 
 898                  try: 
 899                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 900                  except IndexError: 
 901                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 902              else: 
 903                  loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 904   
 905          tokens = self.postParse( instring, loc, tokens ) 
 906   
 907          retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 
 908          if self.parseAction and (doActions or self.callDuringTry): 
 909              if debugging: 
 910                  try: 
 911                      for fn in self.parseAction: 
 912                          tokens = fn( instring, tokensStart, retTokens ) 
 913                          if tokens is not None: 
 914                              retTokens = ParseResults( tokens, 
 915                                                        self.resultsName, 
 916                                                        asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 917                                                        modal=self.modalResults ) 
 918                  except ParseBaseException: 
 919                       
 920                      if (self.debugActions[2] ): 
 921                          err = sys.exc_info()[1] 
 922                          self.debugActions[2]( instring, tokensStart, self, err ) 
 923                      raise 
 924              else: 
 925                  for fn in self.parseAction: 
 926                      tokens = fn( instring, tokensStart, retTokens ) 
 927                      if tokens is not None: 
 928                          retTokens = ParseResults( tokens, 
 929                                                    self.resultsName, 
 930                                                    asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 931                                                    modal=self.modalResults ) 
 932   
 933          if debugging: 
 934               
 935              if (self.debugActions[1] ): 
 936                  self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 
 937   
 938          return loc, retTokens 
  939   
 945   
 946       
 947       
 948 -    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 
  949          lookup = (self,instring,loc,callPreParse,doActions) 
 950          if lookup in ParserElement._exprArgCache: 
 951              value = ParserElement._exprArgCache[ lookup ] 
 952              if isinstance(value, Exception): 
 953                  raise value 
 954              return (value[0],value[1].copy()) 
 955          else: 
 956              try: 
 957                  value = self._parseNoCache( instring, loc, doActions, callPreParse ) 
 958                  ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 
 959                  return value 
 960              except ParseBaseException: 
 961                  pe = sys.exc_info()[1] 
 962                  ParserElement._exprArgCache[ lookup ] = pe 
 963                  raise 
  964   
 965      _parse = _parseNoCache 
 966   
 967       
 968      _exprArgCache = {} 
 971      resetCache = staticmethod(resetCache) 
 972   
 973      _packratEnabled = False 
 975          """Enables "packrat" parsing, which adds memoizing to the parsing logic. 
 976             Repeated parse attempts at the same string location (which happens 
 977             often in many complex grammars) can immediately return a cached value, 
 978             instead of re-executing parsing/validating code.  Memoizing is done of 
 979             both valid results and parsing exceptions. 
 980   
 981             This speedup may break existing programs that use parse actions that 
 982             have side-effects.  For this reason, packrat parsing is disabled when 
 983             you first import pyparsing.  To activate the packrat feature, your 
 984             program must call the class method C{ParserElement.enablePackrat()}.  If 
 985             your program uses C{psyco} to "compile as you go", you must call 
 986             C{enablePackrat} before calling C{psyco.full()}.  If you do not do this, 
 987             Python will crash.  For best results, call C{enablePackrat()} immediately 
 988             after importing pyparsing. 
 989          """ 
 990          if not ParserElement._packratEnabled: 
 991              ParserElement._packratEnabled = True 
 992              ParserElement._parse = ParserElement._parseCache 
  993      enablePackrat = staticmethod(enablePackrat) 
 994   
 996          """Execute the parse expression with the given string. 
 997             This is the main interface to the client code, once the complete 
 998             expression has been built. 
 999   
1000             If you want the grammar to require that the entire input string be 
1001             successfully parsed, then set C{parseAll} to True (equivalent to ending 
1002             the grammar with C{L{StringEnd()}}). 
1003   
1004             Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 
1005             in order to report proper column numbers in parse actions. 
1006             If the input string contains tabs and 
1007             the grammar uses parse actions that use the C{loc} argument to index into the 
1008             string being parsed, you can ensure you have a consistent view of the input 
1009             string by: 
1010              - calling C{parseWithTabs} on your grammar before calling C{parseString} 
1011                (see L{I{parseWithTabs}<parseWithTabs>}) 
1012              - define your parse action using the full C{(s,loc,toks)} signature, and 
1013                reference the input string using the parse action's C{s} argument 
1014              - explictly expand the tabs in your input string before calling 
1015                C{parseString} 
1016          """ 
1017          ParserElement.resetCache() 
1018          if not self.streamlined: 
1019              self.streamline() 
1020               
1021          for e in self.ignoreExprs: 
1022              e.streamline() 
1023          if not self.keepTabs: 
1024              instring = instring.expandtabs() 
1025          try: 
1026              loc, tokens = self._parse( instring, 0 ) 
1027              if parseAll: 
1028                  loc = self.preParse( instring, loc ) 
1029                  se = Empty() + StringEnd() 
1030                  se._parse( instring, loc ) 
1031          except ParseBaseException: 
1032              if ParserElement.verbose_stacktrace: 
1033                  raise 
1034              else: 
1035                   
1036                  exc = sys.exc_info()[1] 
1037                  raise exc 
1038          else: 
1039              return tokens 
 1040   
1042          """Scan the input string for expression matches.  Each match will return the 
1043             matching tokens, start location, and end location.  May be called with optional 
1044             C{maxMatches} argument, to clip scanning after 'n' matches are found.  If 
1045             C{overlap} is specified, then overlapping matches will be reported. 
1046   
1047             Note that the start and end locations are reported relative to the string 
1048             being parsed.  See L{I{parseString}<parseString>} for more information on parsing 
1049             strings with embedded tabs.""" 
1050          if not self.streamlined: 
1051              self.streamline() 
1052          for e in self.ignoreExprs: 
1053              e.streamline() 
1054   
1055          if not self.keepTabs: 
1056              instring = _ustr(instring).expandtabs() 
1057          instrlen = len(instring) 
1058          loc = 0 
1059          preparseFn = self.preParse 
1060          parseFn = self._parse 
1061          ParserElement.resetCache() 
1062          matches = 0 
1063          try: 
1064              while loc <= instrlen and matches < maxMatches: 
1065                  try: 
1066                      preloc = preparseFn( instring, loc ) 
1067                      nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 
1068                  except ParseException: 
1069                      loc = preloc+1 
1070                  else: 
1071                      if nextLoc > loc: 
1072                          matches += 1 
1073                          yield tokens, preloc, nextLoc 
1074                          if overlap: 
1075                              nextloc = preparseFn( instring, loc ) 
1076                              if nextloc > loc: 
1077                                  loc = nextLoc 
1078                              else: 
1079                                  loc += 1 
1080                          else: 
1081                              loc = nextLoc 
1082                      else: 
1083                          loc = preloc+1 
1084          except ParseBaseException: 
1085              if ParserElement.verbose_stacktrace: 
1086                  raise 
1087              else: 
1088                   
1089                  exc = sys.exc_info()[1] 
1090                  raise exc 
 1091   
1125   
1127          """Another extension to C{L{scanString}}, simplifying the access to the tokens found 
1128             to match the given parse expression.  May be called with optional 
1129             C{maxMatches} argument, to clip searching after 'n' matches are found. 
1130          """ 
1131          try: 
1132              return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 
1133          except ParseBaseException: 
1134              if ParserElement.verbose_stacktrace: 
1135                  raise 
1136              else: 
1137                   
1138                  exc = sys.exc_info()[1] 
1139                  raise exc 
 1140   
1142          """Implementation of + operator - returns C{L{And}}""" 
1143          if isinstance( other, basestring ): 
1144              other = ParserElement.literalStringClass( other ) 
1145          if not isinstance( other, ParserElement ): 
1146              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1147                      SyntaxWarning, stacklevel=2) 
1148              return None 
1149          return And( [ self, other ] ) 
 1150   
1152          """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 
1153          if isinstance( other, basestring ): 
1154              other = ParserElement.literalStringClass( other ) 
1155          if not isinstance( other, ParserElement ): 
1156              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1157                      SyntaxWarning, stacklevel=2) 
1158              return None 
1159          return other + self 
 1160   
1162          """Implementation of - operator, returns C{L{And}} with error stop""" 
1163          if isinstance( other, basestring ): 
1164              other = ParserElement.literalStringClass( other ) 
1165          if not isinstance( other, ParserElement ): 
1166              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1167                      SyntaxWarning, stacklevel=2) 
1168              return None 
1169          return And( [ self, And._ErrorStop(), other ] ) 
 1170   
1172          """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 
1173          if isinstance( other, basestring ): 
1174              other = ParserElement.literalStringClass( other ) 
1175          if not isinstance( other, ParserElement ): 
1176              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1177                      SyntaxWarning, stacklevel=2) 
1178              return None 
1179          return other - self 
 1180   
1182          """Implementation of * operator, allows use of C{expr * 3} in place of 
1183             C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer 
1184             tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples 
1185             may also include C{None} as in: 
1186              - C{expr*(n,None)} or C{expr*(n,)} is equivalent 
1187                to C{expr*n + L{ZeroOrMore}(expr)} 
1188                (read as "at least n instances of C{expr}") 
1189              - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 
1190                (read as "0 to n instances of C{expr}") 
1191              - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 
1192              - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 
1193   
1194             Note that C{expr*(None,n)} does not raise an exception if 
1195             more than n exprs exist in the input stream; that is, 
1196             C{expr*(None,n)} does not enforce a maximum number of expr 
1197             occurrences.  If this behavior is desired, then write 
1198             C{expr*(None,n) + ~expr} 
1199   
1200          """ 
1201          if isinstance(other,int): 
1202              minElements, optElements = other,0 
1203          elif isinstance(other,tuple): 
1204              other = (other + (None, None))[:2] 
1205              if other[0] is None: 
1206                  other = (0, other[1]) 
1207              if isinstance(other[0],int) and other[1] is None: 
1208                  if other[0] == 0: 
1209                      return ZeroOrMore(self) 
1210                  if other[0] == 1: 
1211                      return OneOrMore(self) 
1212                  else: 
1213                      return self*other[0] + ZeroOrMore(self) 
1214              elif isinstance(other[0],int) and isinstance(other[1],int): 
1215                  minElements, optElements = other 
1216                  optElements -= minElements 
1217              else: 
1218                  raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 
1219          else: 
1220              raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 
1221   
1222          if minElements < 0: 
1223              raise ValueError("cannot multiply ParserElement by negative value") 
1224          if optElements < 0: 
1225              raise ValueError("second tuple value must be greater or equal to first tuple value") 
1226          if minElements == optElements == 0: 
1227              raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 
1228   
1229          if (optElements): 
1230              def makeOptionalList(n): 
1231                  if n>1: 
1232                      return Optional(self + makeOptionalList(n-1)) 
1233                  else: 
1234                      return Optional(self) 
 1235              if minElements: 
1236                  if minElements == 1: 
1237                      ret = self + makeOptionalList(optElements) 
1238                  else: 
1239                      ret = And([self]*minElements) + makeOptionalList(optElements) 
1240              else: 
1241                  ret = makeOptionalList(optElements) 
1242          else: 
1243              if minElements == 1: 
1244                  ret = self 
1245              else: 
1246                  ret = And([self]*minElements) 
1247          return ret 
1248   
1251   
1253          """Implementation of | operator - returns C{L{MatchFirst}}""" 
1254          if isinstance( other, basestring ): 
1255              other = ParserElement.literalStringClass( other ) 
1256          if not isinstance( other, ParserElement ): 
1257              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1258                      SyntaxWarning, stacklevel=2) 
1259              return None 
1260          return MatchFirst( [ self, other ] ) 
 1261   
1263          """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 
1264          if isinstance( other, basestring ): 
1265              other = ParserElement.literalStringClass( other ) 
1266          if not isinstance( other, ParserElement ): 
1267              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1268                      SyntaxWarning, stacklevel=2) 
1269              return None 
1270          return other | self 
 1271   
1273          """Implementation of ^ operator - returns C{L{Or}}""" 
1274          if isinstance( other, basestring ): 
1275              other = ParserElement.literalStringClass( other ) 
1276          if not isinstance( other, ParserElement ): 
1277              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1278                      SyntaxWarning, stacklevel=2) 
1279              return None 
1280          return Or( [ self, other ] ) 
 1281   
1283          """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 
1284          if isinstance( other, basestring ): 
1285              other = ParserElement.literalStringClass( other ) 
1286          if not isinstance( other, ParserElement ): 
1287              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1288                      SyntaxWarning, stacklevel=2) 
1289              return None 
1290          return other ^ self 
 1291   
1293          """Implementation of & operator - returns C{L{Each}}""" 
1294          if isinstance( other, basestring ): 
1295              other = ParserElement.literalStringClass( other ) 
1296          if not isinstance( other, ParserElement ): 
1297              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1298                      SyntaxWarning, stacklevel=2) 
1299              return None 
1300          return Each( [ self, other ] ) 
 1301   
1303          """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 
1304          if isinstance( other, basestring ): 
1305              other = ParserElement.literalStringClass( other ) 
1306          if not isinstance( other, ParserElement ): 
1307              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1308                      SyntaxWarning, stacklevel=2) 
1309              return None 
1310          return other & self 
 1311   
1313          """Implementation of ~ operator - returns C{L{NotAny}}""" 
1314          return NotAny( self ) 
 1315   
1317          """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 
1318               userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 
1319             could be written as:: 
1320               userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 
1321                
1322             If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 
1323             passed as C{True}. 
1324             """ 
1325          return self.setResultsName(name) 
 1326   
1328          """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 
1329             cluttering up returned output. 
1330          """ 
1331          return Suppress( self ) 
 1332   
1334          """Disables the skipping of whitespace before matching the characters in the 
1335             C{ParserElement}'s defined pattern.  This is normally only used internally by 
1336             the pyparsing module, but may be needed in some whitespace-sensitive grammars. 
1337          """ 
1338          self.skipWhitespace = False 
1339          return self 
 1340   
1342          """Overrides the default whitespace chars 
1343          """ 
1344          self.skipWhitespace = True 
1345          self.whiteChars = chars 
1346          self.copyDefaultWhiteChars = False 
1347          return self 
 1348   
1350          """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 
1351             Must be called before C{parseString} when the input grammar contains elements that 
1352             match C{<TAB>} characters.""" 
1353          self.keepTabs = True 
1354          return self 
 1355   
1357          """Define expression to be ignored (e.g., comments) while doing pattern 
1358             matching; may be called repeatedly, to define multiple comment or other 
1359             ignorable patterns. 
1360          """ 
1361          if isinstance( other, Suppress ): 
1362              if other not in self.ignoreExprs: 
1363                  self.ignoreExprs.append( other.copy() ) 
1364          else: 
1365              self.ignoreExprs.append( Suppress( other.copy() ) ) 
1366          return self 
 1367   
1368 -    def setDebugActions( self, startAction, successAction, exceptionAction ): 
 1369          """Enable display of debugging messages while doing pattern matching.""" 
1370          self.debugActions = (startAction or _defaultStartDebugAction, 
1371                               successAction or _defaultSuccessDebugAction, 
1372                               exceptionAction or _defaultExceptionDebugAction) 
1373          self.debug = True 
1374          return self 
 1375   
1377          """Enable display of debugging messages while doing pattern matching. 
1378             Set C{flag} to True to enable, False to disable.""" 
1379          if flag: 
1380              self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 
1381          else: 
1382              self.debug = False 
1383          return self 
 1384   
1387   
1390   
1392          self.streamlined = True 
1393          self.strRepr = None 
1394          return self 
 1395   
1398   
1399 -    def validate( self, validateTrace=[] ): 
 1400          """Check defined expressions for valid structure, check for infinite recursive definitions.""" 
1401          self.checkRecursion( [] ) 
 1402   
1403 -    def parseFile( self, file_or_filename, parseAll=False ): 
 1404          """Execute the parse expression on the given file or filename. 
1405             If a filename is specified (instead of a file object), 
1406             the entire file is opened, read, and closed before parsing. 
1407          """ 
1408          try: 
1409              file_contents = file_or_filename.read() 
1410          except AttributeError: 
1411              f = open(file_or_filename, "r") 
1412              file_contents = f.read() 
1413              f.close() 
1414          try: 
1415              return self.parseString(file_contents, parseAll) 
1416          except ParseBaseException: 
1417               
1418              exc = sys.exc_info()[1] 
1419              raise exc 
 1420   
1423   
1425          if aname == "myException": 
1426              self.myException = ret = self.getException(); 
1427              return ret; 
1428          else: 
1429              raise AttributeError("no such attribute " + aname) 
 1430   
1432          if isinstance(other, ParserElement): 
1433              return self is other or self.__dict__ == other.__dict__ 
1434          elif isinstance(other, basestring): 
1435              try: 
1436                  self.parseString(_ustr(other), parseAll=True) 
1437                  return True 
1438              except ParseBaseException: 
1439                  return False 
1440          else: 
1441              return super(ParserElement,self)==other 
 1442   
1444          return not (self == other) 
 1445   
1447          return hash(id(self)) 
 1448   
1450          return self == other 
 1451   
1453          return not (self == other) 
 1454   
1455   
1456 -class Token(ParserElement): 
 1457      """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" 
1460   
1462          s = super(Token,self).setName(name) 
1463          self.errmsg = "Expected " + self.name 
1464          return s 
  1465   
1466   
1468      """An empty token, will always match.""" 
1470          super(Empty,self).__init__() 
1471          self.name = "Empty" 
1472          self.mayReturnEmpty = True 
1473          self.mayIndexError = False 
  1474   
1475   
1477      """A token that will never match.""" 
1479          super(NoMatch,self).__init__() 
1480          self.name = "NoMatch" 
1481          self.mayReturnEmpty = True 
1482          self.mayIndexError = False 
1483          self.errmsg = "Unmatchable token" 
 1484   
1485 -    def parseImpl( self, instring, loc, doActions=True ): 
 1486          exc = self.myException 
1487          exc.loc = loc 
1488          exc.pstr = instring 
1489          raise exc 
  1490   
1491   
1493      """Token to exactly match a specified string.""" 
1495          super(Literal,self).__init__() 
1496          self.match = matchString 
1497          self.matchLen = len(matchString) 
1498          try: 
1499              self.firstMatchChar = matchString[0] 
1500          except IndexError: 
1501              warnings.warn("null string passed to Literal; use Empty() instead", 
1502                              SyntaxWarning, stacklevel=2) 
1503              self.__class__ = Empty 
1504          self.name = '"%s"' % _ustr(self.match) 
1505          self.errmsg = "Expected " + self.name 
1506          self.mayReturnEmpty = False 
1507          self.mayIndexError = False 
 1508   
1509       
1510       
1511       
1512       
1513 -    def parseImpl( self, instring, loc, doActions=True ): 
 1514          if (instring[loc] == self.firstMatchChar and 
1515              (self.matchLen==1 or instring.startswith(self.match,loc)) ): 
1516              return loc+self.matchLen, self.match 
1517           
1518          exc = self.myException 
1519          exc.loc = loc 
1520          exc.pstr = instring 
1521          raise exc 
  1522  _L = Literal 
1523  ParserElement.literalStringClass = Literal 
1524   
1526      """Token to exactly match a specified string as a keyword, that is, it must be 
1527         immediately followed by a non-keyword character.  Compare with C{L{Literal}}:: 
1528           Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 
1529           Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 
1530         Accepts two optional constructor arguments in addition to the keyword string: 
1531         C{identChars} is a string of characters that would be valid identifier characters, 
1532         defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 
1533         matching, default is C{False}. 
1534      """ 
1535      DEFAULT_KEYWORD_CHARS = alphanums+"_$" 
1536   
1538          super(Keyword,self).__init__() 
1539          self.match = matchString 
1540          self.matchLen = len(matchString) 
1541          try: 
1542              self.firstMatchChar = matchString[0] 
1543          except IndexError: 
1544              warnings.warn("null string passed to Keyword; use Empty() instead", 
1545                              SyntaxWarning, stacklevel=2) 
1546          self.name = '"%s"' % self.match 
1547          self.errmsg = "Expected " + self.name 
1548          self.mayReturnEmpty = False 
1549          self.mayIndexError = False 
1550          self.caseless = caseless 
1551          if caseless: 
1552              self.caselessmatch = matchString.upper() 
1553              identChars = identChars.upper() 
1554          self.identChars = set(identChars) 
 1555   
1556 -    def parseImpl( self, instring, loc, doActions=True ): 
 1557          if self.caseless: 
1558              if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1559                   (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 
1560                   (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 
1561                  return loc+self.matchLen, self.match 
1562          else: 
1563              if (instring[loc] == self.firstMatchChar and 
1564                  (self.matchLen==1 or instring.startswith(self.match,loc)) and 
1565                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 
1566                  (loc == 0 or instring[loc-1] not in self.identChars) ): 
1567                  return loc+self.matchLen, self.match 
1568           
1569          exc = self.myException 
1570          exc.loc = loc 
1571          exc.pstr = instring 
1572          raise exc 
 1573   
1578   
1583      setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 
 1584   
1586      """Token to match a specified string, ignoring case of letters. 
1587         Note: the matched results will always be in the case of the given 
1588         match string, NOT the case of the input text. 
1589      """ 
1591          super(CaselessLiteral,self).__init__( matchString.upper() ) 
1592           
1593          self.returnString = matchString 
1594          self.name = "'%s'" % self.returnString 
1595          self.errmsg = "Expected " + self.name 
 1596   
1597 -    def parseImpl( self, instring, loc, doActions=True ): 
 1598          if instring[ loc:loc+self.matchLen ].upper() == self.match: 
1599              return loc+self.matchLen, self.returnString 
1600           
1601          exc = self.myException 
1602          exc.loc = loc 
1603          exc.pstr = instring 
1604          raise exc 
  1605   
1609   
1610 -    def parseImpl( self, instring, loc, doActions=True ): 
 1611          if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1612               (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 
1613              return loc+self.matchLen, self.match 
1614           
1615          exc = self.myException 
1616          exc.loc = loc 
1617          exc.pstr = instring 
1618          raise exc 
  1619   
1621      """Token for matching words composed of allowed character sets. 
1622         Defined with string containing all allowed initial characters, 
1623         an optional string containing allowed body characters (if omitted, 
1624         defaults to the initial character set), and an optional minimum, 
1625         maximum, and/or exact length.  The default value for C{min} is 1 (a 
1626         minimum value < 1 is not valid); the default values for C{max} and C{exact} 
1627         are 0, meaning no maximum or exact length restriction. An optional 
1628         C{exclude} parameter can list characters that might be found in  
1629         the input C{bodyChars} string; useful to define a word of all printables 
1630         except for one or two characters, for instance. 
1631      """ 
1632 -    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 
 1633          super(Word,self).__init__() 
1634          if excludeChars: 
1635              initChars = ''.join([c for c in initChars if c not in excludeChars]) 
1636              if bodyChars: 
1637                  bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) 
1638          self.initCharsOrig = initChars 
1639          self.initChars = set(initChars) 
1640          if bodyChars : 
1641              self.bodyCharsOrig = bodyChars 
1642              self.bodyChars = set(bodyChars) 
1643          else: 
1644              self.bodyCharsOrig = initChars 
1645              self.bodyChars = set(initChars) 
1646   
1647          self.maxSpecified = max > 0 
1648   
1649          if min < 1: 
1650              raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 
1651   
1652          self.minLen = min 
1653   
1654          if max > 0: 
1655              self.maxLen = max 
1656          else: 
1657              self.maxLen = _MAX_INT 
1658   
1659          if exact > 0: 
1660              self.maxLen = exact 
1661              self.minLen = exact 
1662   
1663          self.name = _ustr(self) 
1664          self.errmsg = "Expected " + self.name 
1665          self.mayIndexError = False 
1666          self.asKeyword = asKeyword 
1667   
1668          if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 
1669              if self.bodyCharsOrig == self.initCharsOrig: 
1670                  self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 
1671              elif len(self.bodyCharsOrig) == 1: 
1672                  self.reString = "%s[%s]*" % \ 
1673                                        (re.escape(self.initCharsOrig), 
1674                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1675              else: 
1676                  self.reString = "[%s][%s]*" % \ 
1677                                        (_escapeRegexRangeChars(self.initCharsOrig), 
1678                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1679              if self.asKeyword: 
1680                  self.reString = r"\b"+self.reString+r"\b" 
1681              try: 
1682                  self.re = re.compile( self.reString ) 
1683              except: 
1684                  self.re = None 
 1685   
1686 -    def parseImpl( self, instring, loc, doActions=True ): 
 1687          if self.re: 
1688              result = self.re.match(instring,loc) 
1689              if not result: 
1690                  exc = self.myException 
1691                  exc.loc = loc 
1692                  exc.pstr = instring 
1693                  raise exc 
1694   
1695              loc = result.end() 
1696              return loc, result.group() 
1697   
1698          if not(instring[ loc ] in self.initChars): 
1699               
1700              exc = self.myException 
1701              exc.loc = loc 
1702              exc.pstr = instring 
1703              raise exc 
1704          start = loc 
1705          loc += 1 
1706          instrlen = len(instring) 
1707          bodychars = self.bodyChars 
1708          maxloc = start + self.maxLen 
1709          maxloc = min( maxloc, instrlen ) 
1710          while loc < maxloc and instring[loc] in bodychars: 
1711              loc += 1 
1712   
1713          throwException = False 
1714          if loc - start < self.minLen: 
1715              throwException = True 
1716          if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 
1717              throwException = True 
1718          if self.asKeyword: 
1719              if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 
1720                  throwException = True 
1721   
1722          if throwException: 
1723               
1724              exc = self.myException 
1725              exc.loc = loc 
1726              exc.pstr = instring 
1727              raise exc 
1728   
1729          return loc, instring[start:loc] 
 1730   
1732          try: 
1733              return super(Word,self).__str__() 
1734          except: 
1735              pass 
1736   
1737   
1738          if self.strRepr is None: 
1739   
1740              def charsAsStr(s): 
1741                  if len(s)>4: 
1742                      return s[:4]+"..." 
1743                  else: 
1744                      return s 
 1745   
1746              if ( self.initCharsOrig != self.bodyCharsOrig ): 
1747                  self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 
1748              else: 
1749                  self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 
1750   
1751          return self.strRepr 
 1752   
1753   
1755      """Token for matching strings that match a given regular expression. 
1756         Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 
1757      """ 
1758      compiledREtype = type(re.compile("[A-Z]")) 
1759 -    def __init__( self, pattern, flags=0): 
 1760          """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 
1761          super(Regex,self).__init__() 
1762   
1763          if isinstance(pattern, basestring): 
1764              if len(pattern) == 0: 
1765                  warnings.warn("null string passed to Regex; use Empty() instead", 
1766                          SyntaxWarning, stacklevel=2) 
1767   
1768              self.pattern = pattern 
1769              self.flags = flags 
1770   
1771              try: 
1772                  self.re = re.compile(self.pattern, self.flags) 
1773                  self.reString = self.pattern 
1774              except sre_constants.error: 
1775                  warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
1776                      SyntaxWarning, stacklevel=2) 
1777                  raise 
1778   
1779          elif isinstance(pattern, Regex.compiledREtype): 
1780              self.re = pattern 
1781              self.pattern = \ 
1782              self.reString = str(pattern) 
1783              self.flags = flags 
1784               
1785          else: 
1786              raise ValueError("Regex may only be constructed with a string or a compiled RE object") 
1787   
1788          self.name = _ustr(self) 
1789          self.errmsg = "Expected " + self.name 
1790          self.mayIndexError = False 
1791          self.mayReturnEmpty = True 
 1792   
1793 -    def parseImpl( self, instring, loc, doActions=True ): 
 1794          result = self.re.match(instring,loc) 
1795          if not result: 
1796              exc = self.myException 
1797              exc.loc = loc 
1798              exc.pstr = instring 
1799              raise exc 
1800   
1801          loc = result.end() 
1802          d = result.groupdict() 
1803          ret = ParseResults(result.group()) 
1804          if d: 
1805              for k in d: 
1806                  ret[k] = d[k] 
1807          return loc,ret 
 1808   
1810          try: 
1811              return super(Regex,self).__str__() 
1812          except: 
1813              pass 
1814   
1815          if self.strRepr is None: 
1816              self.strRepr = "Re:(%s)" % repr(self.pattern) 
1817   
1818          return self.strRepr 
  1819   
1820   
1822      """Token for matching strings that are delimited by quoting characters. 
1823      """ 
1824 -    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 
 1825          """ 
1826             Defined with the following parameters: 
1827              - quoteChar - string of one or more characters defining the quote delimiting string 
1828              - escChar - character to escape quotes, typically backslash (default=None) 
1829              - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 
1830              - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 
1831              - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 
1832              - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 
1833          """ 
1834          super(QuotedString,self).__init__() 
1835   
1836           
1837          quoteChar = quoteChar.strip() 
1838          if len(quoteChar) == 0: 
1839              warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1840              raise SyntaxError() 
1841   
1842          if endQuoteChar is None: 
1843              endQuoteChar = quoteChar 
1844          else: 
1845              endQuoteChar = endQuoteChar.strip() 
1846              if len(endQuoteChar) == 0: 
1847                  warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1848                  raise SyntaxError() 
1849   
1850          self.quoteChar = quoteChar 
1851          self.quoteCharLen = len(quoteChar) 
1852          self.firstQuoteChar = quoteChar[0] 
1853          self.endQuoteChar = endQuoteChar 
1854          self.endQuoteCharLen = len(endQuoteChar) 
1855          self.escChar = escChar 
1856          self.escQuote = escQuote 
1857          self.unquoteResults = unquoteResults 
1858   
1859          if multiline: 
1860              self.flags = re.MULTILINE | re.DOTALL 
1861              self.pattern = r'%s(?:[^%s%s]' % \ 
1862                  ( re.escape(self.quoteChar), 
1863                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1864                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1865          else: 
1866              self.flags = 0 
1867              self.pattern = r'%s(?:[^%s\n\r%s]' % \ 
1868                  ( re.escape(self.quoteChar), 
1869                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1870                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1871          if len(self.endQuoteChar) > 1: 
1872              self.pattern += ( 
1873                  '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 
1874                                                 _escapeRegexRangeChars(self.endQuoteChar[i])) 
1875                                      for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 
1876                  ) 
1877          if escQuote: 
1878              self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 
1879          if escChar: 
1880              self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 
1881              charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 
1882              self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 
1883          self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 
1884   
1885          try: 
1886              self.re = re.compile(self.pattern, self.flags) 
1887              self.reString = self.pattern 
1888          except sre_constants.error: 
1889              warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
1890                  SyntaxWarning, stacklevel=2) 
1891              raise 
1892   
1893          self.name = _ustr(self) 
1894          self.errmsg = "Expected " + self.name 
1895          self.mayIndexError = False 
1896          self.mayReturnEmpty = True 
 1897   
1898 -    def parseImpl( self, instring, loc, doActions=True ): 
 1899          result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 
1900          if not result: 
1901              exc = self.myException 
1902              exc.loc = loc 
1903              exc.pstr = instring 
1904              raise exc 
1905   
1906          loc = result.end() 
1907          ret = result.group() 
1908   
1909          if self.unquoteResults: 
1910   
1911               
1912              ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 
1913   
1914              if isinstance(ret,basestring): 
1915                   
1916                  if self.escChar: 
1917                      ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 
1918   
1919                   
1920                  if self.escQuote: 
1921                      ret = ret.replace(self.escQuote, self.endQuoteChar) 
1922   
1923          return loc, ret 
 1924   
1926          try: 
1927              return super(QuotedString,self).__str__() 
1928          except: 
1929              pass 
1930   
1931          if self.strRepr is None: 
1932              self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 
1933   
1934          return self.strRepr 
  1935   
1936   
1938      """Token for matching words composed of characters *not* in a given set. 
1939         Defined with string containing all disallowed characters, and an optional 
1940         minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a 
1941         minimum value < 1 is not valid); the default values for C{max} and C{exact} 
1942         are 0, meaning no maximum or exact length restriction. 
1943      """ 
1944 -    def __init__( self, notChars, min=1, max=0, exact=0 ): 
 1945          super(CharsNotIn,self).__init__() 
1946          self.skipWhitespace = False 
1947          self.notChars = notChars 
1948   
1949          if min < 1: 
1950              raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 
1951   
1952          self.minLen = min 
1953   
1954          if max > 0: 
1955              self.maxLen = max 
1956          else: 
1957              self.maxLen = _MAX_INT 
1958   
1959          if exact > 0: 
1960              self.maxLen = exact 
1961              self.minLen = exact 
1962   
1963          self.name = _ustr(self) 
1964          self.errmsg = "Expected " + self.name 
1965          self.mayReturnEmpty = ( self.minLen == 0 ) 
1966          self.mayIndexError = False 
 1967   
1968 -    def parseImpl( self, instring, loc, doActions=True ): 
 1969          if instring[loc] in self.notChars: 
1970               
1971              exc = self.myException 
1972              exc.loc = loc 
1973              exc.pstr = instring 
1974              raise exc 
1975   
1976          start = loc 
1977          loc += 1 
1978          notchars = self.notChars 
1979          maxlen = min( start+self.maxLen, len(instring) ) 
1980          while loc < maxlen and \ 
1981                (instring[loc] not in notchars): 
1982              loc += 1 
1983   
1984          if loc - start < self.minLen: 
1985               
1986              exc = self.myException 
1987              exc.loc = loc 
1988              exc.pstr = instring 
1989              raise exc 
1990   
1991          return loc, instring[start:loc] 
 1992   
1994          try: 
1995              return super(CharsNotIn, self).__str__() 
1996          except: 
1997              pass 
1998   
1999          if self.strRepr is None: 
2000              if len(self.notChars) > 4: 
2001                  self.strRepr = "!W:(%s...)" % self.notChars[:4] 
2002              else: 
2003                  self.strRepr = "!W:(%s)" % self.notChars 
2004   
2005          return self.strRepr 
  2006   
2008      """Special matching class for matching whitespace.  Normally, whitespace is ignored 
2009         by pyparsing grammars.  This class is included when some whitespace structures 
2010         are significant.  Define with a string containing the whitespace characters to be 
2011         matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments, 
2012         as defined for the C{L{Word}} class.""" 
2013      whiteStrs = { 
2014          " " : "<SPC>", 
2015          "\t": "<TAB>", 
2016          "\n": "<LF>", 
2017          "\r": "<CR>", 
2018          "\f": "<FF>", 
2019          } 
2020 -    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 
 2021          super(White,self).__init__() 
2022          self.matchWhite = ws 
2023          self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 
2024           
2025          self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 
2026          self.mayReturnEmpty = True 
2027          self.errmsg = "Expected " + self.name 
2028   
2029          self.minLen = min 
2030   
2031          if max > 0: 
2032              self.maxLen = max 
2033          else: 
2034              self.maxLen = _MAX_INT 
2035   
2036          if exact > 0: 
2037              self.maxLen = exact 
2038              self.minLen = exact 
 2039   
2040 -    def parseImpl( self, instring, loc, doActions=True ): 
 2041          if not(instring[ loc ] in self.matchWhite): 
2042               
2043              exc = self.myException 
2044              exc.loc = loc 
2045              exc.pstr = instring 
2046              raise exc 
2047          start = loc 
2048          loc += 1 
2049          maxloc = start + self.maxLen 
2050          maxloc = min( maxloc, len(instring) ) 
2051          while loc < maxloc and instring[loc] in self.matchWhite: 
2052              loc += 1 
2053   
2054          if loc - start < self.minLen: 
2055               
2056              exc = self.myException 
2057              exc.loc = loc 
2058              exc.pstr = instring 
2059              raise exc 
2060   
2061          return loc, instring[start:loc] 
  2062   
2063   
2066          super(_PositionToken,self).__init__() 
2067          self.name=self.__class__.__name__ 
2068          self.mayReturnEmpty = True 
2069          self.mayIndexError = False 
 2072      """Token to advance to a specific column of input text; useful for tabular report scraping.""" 
2076   
2078          if col(loc,instring) != self.col: 
2079              instrlen = len(instring) 
2080              if self.ignoreExprs: 
2081                  loc = self._skipIgnorables( instring, loc ) 
2082              while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 
2083                  loc += 1 
2084          return loc 
 2085   
2086 -    def parseImpl( self, instring, loc, doActions=True ): 
 2087          thiscol = col( loc, instring ) 
2088          if thiscol > self.col: 
2089              raise ParseException( instring, loc, "Text not in expected column", self ) 
2090          newloc = loc + self.col - thiscol 
2091          ret = instring[ loc: newloc ] 
2092          return newloc, ret 
  2093   
2095      """Matches if current position is at the beginning of a line within the parse string""" 
2100   
2102          preloc = super(LineStart,self).preParse(instring,loc) 
2103          if instring[preloc] == "\n": 
2104              loc += 1 
2105          return loc 
 2106   
2107 -    def parseImpl( self, instring, loc, doActions=True ): 
 2108          if not( loc==0 or 
2109              (loc == self.preParse( instring, 0 )) or 
2110              (instring[loc-1] == "\n") ):  
2111               
2112              exc = self.myException 
2113              exc.loc = loc 
2114              exc.pstr = instring 
2115              raise exc 
2116          return loc, [] 
  2117   
2119      """Matches if current position is at the end of a line within the parse string""" 
2124   
2125 -    def parseImpl( self, instring, loc, doActions=True ): 
 2126          if loc<len(instring): 
2127              if instring[loc] == "\n": 
2128                  return loc+1, "\n" 
2129              else: 
2130                   
2131                  exc = self.myException 
2132                  exc.loc = loc 
2133                  exc.pstr = instring 
2134                  raise exc 
2135          elif loc == len(instring): 
2136              return loc+1, [] 
2137          else: 
2138              exc = self.myException 
2139              exc.loc = loc 
2140              exc.pstr = instring 
2141              raise exc 
  2142   
2144      """Matches if current position is at the beginning of the parse string""" 
2148   
2149 -    def parseImpl( self, instring, loc, doActions=True ): 
 2150          if loc != 0: 
2151               
2152              if loc != self.preParse( instring, 0 ): 
2153                   
2154                  exc = self.myException 
2155                  exc.loc = loc 
2156                  exc.pstr = instring 
2157                  raise exc 
2158          return loc, [] 
  2159   
2161      """Matches if current position is at the end of the parse string""" 
2165   
2166 -    def parseImpl( self, instring, loc, doActions=True ): 
 2167          if loc < len(instring): 
2168               
2169              exc = self.myException 
2170              exc.loc = loc 
2171              exc.pstr = instring 
2172              raise exc 
2173          elif loc == len(instring): 
2174              return loc+1, [] 
2175          elif loc > len(instring): 
2176              return loc, [] 
2177          else: 
2178              exc = self.myException 
2179              exc.loc = loc 
2180              exc.pstr = instring 
2181              raise exc 
  2182   
2184      """Matches if the current position is at the beginning of a Word, and 
2185         is not preceded by any character in a given set of C{wordChars} 
2186         (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
2187         use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 
2188         the string being parsed, or at the beginning of a line. 
2189      """ 
2191          super(WordStart,self).__init__() 
2192          self.wordChars = set(wordChars) 
2193          self.errmsg = "Not at the start of a word" 
 2194   
2195 -    def parseImpl(self, instring, loc, doActions=True ): 
 2196          if loc != 0: 
2197              if (instring[loc-1] in self.wordChars or 
2198                  instring[loc] not in self.wordChars): 
2199                  exc = self.myException 
2200                  exc.loc = loc 
2201                  exc.pstr = instring 
2202                  raise exc 
2203          return loc, [] 
  2204   
2206      """Matches if the current position is at the end of a Word, and 
2207         is not followed by any character in a given set of C{wordChars} 
2208         (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
2209         use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 
2210         the string being parsed, or at the end of a line. 
2211      """ 
2213          super(WordEnd,self).__init__() 
2214          self.wordChars = set(wordChars) 
2215          self.skipWhitespace = False 
2216          self.errmsg = "Not at the end of a word" 
 2217   
2218 -    def parseImpl(self, instring, loc, doActions=True ): 
 2219          instrlen = len(instring) 
2220          if instrlen>0 and loc<instrlen: 
2221              if (instring[loc] in self.wordChars or 
2222                  instring[loc-1] not in self.wordChars): 
2223                   
2224                  exc = self.myException 
2225                  exc.loc = loc 
2226                  exc.pstr = instring 
2227                  raise exc 
2228          return loc, [] 
  2229   
2230   
2232      """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 
2233 -    def __init__( self, exprs, savelist = False ): 
 2234          super(ParseExpression,self).__init__(savelist) 
2235          if isinstance( exprs, list ): 
2236              self.exprs = exprs 
2237          elif isinstance( exprs, basestring ): 
2238              self.exprs = [ Literal( exprs ) ] 
2239          else: 
2240              try: 
2241                  self.exprs = list( exprs ) 
2242              except TypeError: 
2243                  self.exprs = [ exprs ] 
2244          self.callPreparse = False 
 2245   
2247          return self.exprs[i] 
 2248   
2250          self.exprs.append( other ) 
2251          self.strRepr = None 
2252          return self 
 2253   
2255          """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 
2256             all contained expressions.""" 
2257          self.skipWhitespace = False 
2258          self.exprs = [ e.copy() for e in self.exprs ] 
2259          for e in self.exprs: 
2260              e.leaveWhitespace() 
2261          return self 
 2262   
2264          if isinstance( other, Suppress ): 
2265              if other not in self.ignoreExprs: 
2266                  super( ParseExpression, self).ignore( other ) 
2267                  for e in self.exprs: 
2268                      e.ignore( self.ignoreExprs[-1] ) 
2269          else: 
2270              super( ParseExpression, self).ignore( other ) 
2271              for e in self.exprs: 
2272                  e.ignore( self.ignoreExprs[-1] ) 
2273          return self 
 2274   
2276          try: 
2277              return super(ParseExpression,self).__str__() 
2278          except: 
2279              pass 
2280   
2281          if self.strRepr is None: 
2282              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 
2283          return self.strRepr 
 2284   
2286          super(ParseExpression,self).streamline() 
2287   
2288          for e in self.exprs: 
2289              e.streamline() 
2290   
2291           
2292           
2293           
2294          if ( len(self.exprs) == 2 ): 
2295              other = self.exprs[0] 
2296              if ( isinstance( other, self.__class__ ) and 
2297                    not(other.parseAction) and 
2298                    other.resultsName is None and 
2299                    not other.debug ): 
2300                  self.exprs = other.exprs[:] + [ self.exprs[1] ] 
2301                  self.strRepr = None 
2302                  self.mayReturnEmpty |= other.mayReturnEmpty 
2303                  self.mayIndexError  |= other.mayIndexError 
2304   
2305              other = self.exprs[-1] 
2306              if ( isinstance( other, self.__class__ ) and 
2307                    not(other.parseAction) and 
2308                    other.resultsName is None and 
2309                    not other.debug ): 
2310                  self.exprs = self.exprs[:-1] + other.exprs[:] 
2311                  self.strRepr = None 
2312                  self.mayReturnEmpty |= other.mayReturnEmpty 
2313                  self.mayIndexError  |= other.mayIndexError 
2314   
2315          return self 
 2316   
2320   
2321 -    def validate( self, validateTrace=[] ): 
 2322          tmp = validateTrace[:]+[self] 
2323          for e in self.exprs: 
2324              e.validate(tmp) 
2325          self.checkRecursion( [] ) 
 2326           
 2331   
2332 -class And(ParseExpression): 
 2333      """Requires all given C{ParseExpression}s to be found in the given order. 
2334         Expressions may be separated by whitespace. 
2335         May be constructed using the C{'+'} operator. 
2336      """ 
2337   
2342   
2343 -    def __init__( self, exprs, savelist = True ): 
 2344          super(And,self).__init__(exprs, savelist) 
2345          self.mayReturnEmpty = True 
2346          for e in self.exprs: 
2347              if not e.mayReturnEmpty: 
2348                  self.mayReturnEmpty = False 
2349                  break 
2350          self.setWhitespaceChars( exprs[0].whiteChars ) 
2351          self.skipWhitespace = exprs[0].skipWhitespace 
2352          self.callPreparse = True 
 2353   
2354 -    def parseImpl( self, instring, loc, doActions=True ): 
 2355           
2356           
2357          loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 
2358          errorStop = False 
2359          for e in self.exprs[1:]: 
2360              if isinstance(e, And._ErrorStop): 
2361                  errorStop = True 
2362                  continue 
2363              if errorStop: 
2364                  try: 
2365                      loc, exprtokens = e._parse( instring, loc, doActions ) 
2366                  except ParseSyntaxException: 
2367                      raise 
2368                  except ParseBaseException: 
2369                      pe = sys.exc_info()[1] 
2370                      raise ParseSyntaxException(pe) 
2371                  except IndexError: 
2372                      raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 
2373              else: 
2374                  loc, exprtokens = e._parse( instring, loc, doActions ) 
2375              if exprtokens or exprtokens.keys(): 
2376                  resultlist += exprtokens 
2377          return loc, resultlist 
 2378   
2380          if isinstance( other, basestring ): 
2381              other = Literal( other ) 
2382          return self.append( other )  
 2383   
2385          subRecCheckList = parseElementList[:] + [ self ] 
2386          for e in self.exprs: 
2387              e.checkRecursion( subRecCheckList ) 
2388              if not e.mayReturnEmpty: 
2389                  break 
 2390   
2392          if hasattr(self,"name"): 
2393              return self.name 
2394   
2395          if self.strRepr is None: 
2396              self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2397   
2398          return self.strRepr 
  2399   
2400   
2401 -class Or(ParseExpression): 
 2402      """Requires that at least one C{ParseExpression} is found. 
2403         If two expressions match, the expression that matches the longest string will be used. 
2404         May be constructed using the C{'^'} operator. 
2405      """ 
2406 -    def __init__( self, exprs, savelist = False ): 
 2407          super(Or,self).__init__(exprs, savelist) 
2408          self.mayReturnEmpty = False 
2409          for e in self.exprs: 
2410              if e.mayReturnEmpty: 
2411                  self.mayReturnEmpty = True 
2412                  break 
 2413   
2414 -    def parseImpl( self, instring, loc, doActions=True ): 
 2415          maxExcLoc = -1 
2416          maxMatchLoc = -1 
2417          maxException = None 
2418          for e in self.exprs: 
2419              try: 
2420                  loc2 = e.tryParse( instring, loc ) 
2421              except ParseException: 
2422                  err = sys.exc_info()[1] 
2423                  if err.loc > maxExcLoc: 
2424                      maxException = err 
2425                      maxExcLoc = err.loc 
2426              except IndexError: 
2427                  if len(instring) > maxExcLoc: 
2428                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2429                      maxExcLoc = len(instring) 
2430              else: 
2431                  if loc2 > maxMatchLoc: 
2432                      maxMatchLoc = loc2 
2433                      maxMatchExp = e 
2434   
2435          if maxMatchLoc < 0: 
2436              if maxException is not None: 
2437                  raise maxException 
2438              else: 
2439                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
2440   
2441          return maxMatchExp._parse( instring, loc, doActions ) 
 2442   
2444          if isinstance( other, basestring ): 
2445              other = ParserElement.literalStringClass( other ) 
2446          return self.append( other )  
 2447   
2449          if hasattr(self,"name"): 
2450              return self.name 
2451   
2452          if self.strRepr is None: 
2453              self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2454   
2455          return self.strRepr 
 2456   
2458          subRecCheckList = parseElementList[:] + [ self ] 
2459          for e in self.exprs: 
2460              e.checkRecursion( subRecCheckList ) 
  2461   
2462   
2464      """Requires that at least one C{ParseExpression} is found. 
2465         If two expressions match, the first one listed is the one that will match. 
2466         May be constructed using the C{'|'} operator. 
2467      """ 
2468 -    def __init__( self, exprs, savelist = False ): 
 2469          super(MatchFirst,self).__init__(exprs, savelist) 
2470          if exprs: 
2471              self.mayReturnEmpty = False 
2472              for e in self.exprs: 
2473                  if e.mayReturnEmpty: 
2474                      self.mayReturnEmpty = True 
2475                      break 
2476          else: 
2477              self.mayReturnEmpty = True 
 2478   
2479 -    def parseImpl( self, instring, loc, doActions=True ): 
 2480          maxExcLoc = -1 
2481          maxException = None 
2482          for e in self.exprs: 
2483              try: 
2484                  ret = e._parse( instring, loc, doActions ) 
2485                  return ret 
2486              except ParseException, err: 
2487                  if err.loc > maxExcLoc: 
2488                      maxException = err 
2489                      maxExcLoc = err.loc 
2490              except IndexError: 
2491                  if len(instring) > maxExcLoc: 
2492                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2493                      maxExcLoc = len(instring) 
2494   
2495           
2496          else: 
2497              if maxException is not None: 
2498                  raise maxException 
2499              else: 
2500                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
 2501   
2503          if isinstance( other, basestring ): 
2504              other = ParserElement.literalStringClass( other ) 
2505          return self.append( other )  
 2506   
2508          if hasattr(self,"name"): 
2509              return self.name 
2510   
2511          if self.strRepr is None: 
2512              self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2513   
2514          return self.strRepr 
 2515   
2517          subRecCheckList = parseElementList[:] + [ self ] 
2518          for e in self.exprs: 
2519              e.checkRecursion( subRecCheckList ) 
  2520   
2521   
2522 -class Each(ParseExpression): 
 2523      """Requires all given C{ParseExpression}s to be found, but in any order. 
2524         Expressions may be separated by whitespace. 
2525         May be constructed using the C{'&'} operator. 
2526      """ 
2527 -    def __init__( self, exprs, savelist = True ): 
 2528          super(Each,self).__init__(exprs, savelist) 
2529          self.mayReturnEmpty = True 
2530          for e in self.exprs: 
2531              if not e.mayReturnEmpty: 
2532                  self.mayReturnEmpty = False 
2533                  break 
2534          self.skipWhitespace = True 
2535          self.initExprGroups = True 
 2536   
2537 -    def parseImpl( self, instring, loc, doActions=True ): 
 2538          if self.initExprGroups: 
2539              opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 
2540              opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 
2541              self.optionals = opt1 + opt2 
2542              self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 
2543              self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 
2544              self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 
2545              self.required += self.multirequired 
2546              self.initExprGroups = False 
2547          tmpLoc = loc 
2548          tmpReqd = self.required[:] 
2549          tmpOpt  = self.optionals[:] 
2550          matchOrder = [] 
2551   
2552          keepMatching = True 
2553          while keepMatching: 
2554              tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 
2555              failed = [] 
2556              for e in tmpExprs: 
2557                  try: 
2558                      tmpLoc = e.tryParse( instring, tmpLoc ) 
2559                  except ParseException: 
2560                      failed.append(e) 
2561                  else: 
2562                      matchOrder.append(e) 
2563                      if e in tmpReqd: 
2564                          tmpReqd.remove(e) 
2565                      elif e in tmpOpt: 
2566                          tmpOpt.remove(e) 
2567              if len(failed) == len(tmpExprs): 
2568                  keepMatching = False 
2569   
2570          if tmpReqd: 
2571              missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 
2572              raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 
2573   
2574           
2575          matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 
2576   
2577          resultlist = [] 
2578          for e in matchOrder: 
2579              loc,results = e._parse(instring,loc,doActions) 
2580              resultlist.append(results) 
2581   
2582          finalResults = ParseResults([]) 
2583          for r in resultlist: 
2584              dups = {} 
2585              for k in r.keys(): 
2586                  if k in finalResults.keys(): 
2587                      tmp = ParseResults(finalResults[k]) 
2588                      tmp += ParseResults(r[k]) 
2589                      dups[k] = tmp 
2590              finalResults += ParseResults(r) 
2591              for k,v in dups.items(): 
2592                  finalResults[k] = v 
2593          return loc, finalResults 
 2594   
2596          if hasattr(self,"name"): 
2597              return self.name 
2598   
2599          if self.strRepr is None: 
2600              self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2601   
2602          return self.strRepr 
 2603   
2605          subRecCheckList = parseElementList[:] + [ self ] 
2606          for e in self.exprs: 
2607              e.checkRecursion( subRecCheckList ) 
  2608   
2609   
2611      """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" 
2612 -    def __init__( self, expr, savelist=False ): 
 2613          super(ParseElementEnhance,self).__init__(savelist) 
2614          if isinstance( expr, basestring ): 
2615              expr = Literal(expr) 
2616          self.expr = expr 
2617          self.strRepr = None 
2618          if expr is not None: 
2619              self.mayIndexError = expr.mayIndexError 
2620              self.mayReturnEmpty = expr.mayReturnEmpty 
2621              self.setWhitespaceChars( expr.whiteChars ) 
2622              self.skipWhitespace = expr.skipWhitespace 
2623              self.saveAsList = expr.saveAsList 
2624              self.callPreparse = expr.callPreparse 
2625              self.ignoreExprs.extend(expr.ignoreExprs) 
 2626   
2627 -    def parseImpl( self, instring, loc, doActions=True ): 
 2628          if self.expr is not None: 
2629              return self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2630          else: 
2631              raise ParseException("",loc,self.errmsg,self) 
 2632   
2634          self.skipWhitespace = False 
2635          self.expr = self.expr.copy() 
2636          if self.expr is not None: 
2637              self.expr.leaveWhitespace() 
2638          return self 
 2639   
2641          if isinstance( other, Suppress ): 
2642              if other not in self.ignoreExprs: 
2643                  super( ParseElementEnhance, self).ignore( other ) 
2644                  if self.expr is not None: 
2645                      self.expr.ignore( self.ignoreExprs[-1] ) 
2646          else: 
2647              super( ParseElementEnhance, self).ignore( other ) 
2648              if self.expr is not None: 
2649                  self.expr.ignore( self.ignoreExprs[-1] ) 
2650          return self 
 2651   
2657   
2659          if self in parseElementList: 
2660              raise RecursiveGrammarException( parseElementList+[self] ) 
2661          subRecCheckList = parseElementList[:] + [ self ] 
2662          if self.expr is not None: 
2663              self.expr.checkRecursion( subRecCheckList ) 
 2664   
2665 -    def validate( self, validateTrace=[] ): 
 2666          tmp = validateTrace[:]+[self] 
2667          if self.expr is not None: 
2668              self.expr.validate(tmp) 
2669          self.checkRecursion( [] ) 
 2670   
2672          try: 
2673              return super(ParseElementEnhance,self).__str__() 
2674          except: 
2675              pass 
2676   
2677          if self.strRepr is None and self.expr is not None: 
2678              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 
2679          return self.strRepr 
  2680   
2681   
2683      """Lookahead matching of the given parse expression.  C{FollowedBy} 
2684      does *not* advance the parsing position within the input string, it only 
2685      verifies that the specified parse expression matches at the current 
2686      position.  C{FollowedBy} always returns a null token list.""" 
2690   
2691 -    def parseImpl( self, instring, loc, doActions=True ): 
 2692          self.expr.tryParse( instring, loc ) 
2693          return loc, [] 
  2694   
2695   
2696 -class NotAny(ParseElementEnhance): 
 2697      """Lookahead to disallow matching with the given parse expression.  C{NotAny} 
2698      does *not* advance the parsing position within the input string, it only 
2699      verifies that the specified parse expression does *not* match at the current 
2700      position.  Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 
2701      always returns a null token list.  May be constructed using the '~' operator.""" 
2703          super(NotAny,self).__init__(expr) 
2704           
2705          self.skipWhitespace = False   
2706          self.mayReturnEmpty = True 
2707          self.errmsg = "Found unwanted token, "+_ustr(self.expr) 
 2708   
2709 -    def parseImpl( self, instring, loc, doActions=True ): 
 2710          try: 
2711              self.expr.tryParse( instring, loc ) 
2712          except (ParseException,IndexError): 
2713              pass 
2714          else: 
2715               
2716              exc = self.myException 
2717              exc.loc = loc 
2718              exc.pstr = instring 
2719              raise exc 
2720          return loc, [] 
 2721   
2723          if hasattr(self,"name"): 
2724              return self.name 
2725   
2726          if self.strRepr is None: 
2727              self.strRepr = "~{" + _ustr(self.expr) + "}" 
2728   
2729          return self.strRepr 
  2730   
2731   
2733      """Optional repetition of zero or more of the given expression.""" 
2737   
2738 -    def parseImpl( self, instring, loc, doActions=True ): 
 2739          tokens = [] 
2740          try: 
2741              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2742              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2743              while 1: 
2744                  if hasIgnoreExprs: 
2745                      preloc = self._skipIgnorables( instring, loc ) 
2746                  else: 
2747                      preloc = loc 
2748                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2749                  if tmptokens or tmptokens.keys(): 
2750                      tokens += tmptokens 
2751          except (ParseException,IndexError): 
2752              pass 
2753   
2754          return loc, tokens 
 2755   
2757          if hasattr(self,"name"): 
2758              return self.name 
2759   
2760          if self.strRepr is None: 
2761              self.strRepr = "[" + _ustr(self.expr) + "]..." 
2762   
2763          return self.strRepr 
 2764   
 2769   
2770   
2772      """Repetition of one or more of the given expression.""" 
2773 -    def parseImpl( self, instring, loc, doActions=True ): 
 2774           
2775          loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2776          try: 
2777              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2778              while 1: 
2779                  if hasIgnoreExprs: 
2780                      preloc = self._skipIgnorables( instring, loc ) 
2781                  else: 
2782                      preloc = loc 
2783                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2784                  if tmptokens or tmptokens.keys(): 
2785                      tokens += tmptokens 
2786          except (ParseException,IndexError): 
2787              pass 
2788   
2789          return loc, tokens 
 2790   
2792          if hasattr(self,"name"): 
2793              return self.name 
2794   
2795          if self.strRepr is None: 
2796              self.strRepr = "{" + _ustr(self.expr) + "}..." 
2797   
2798          return self.strRepr 
 2799   
 2804   
2811   
2812  _optionalNotMatched = _NullToken() 
2814      """Optional matching of the given expression. 
2815         A default return string can also be specified, if the optional expression 
2816         is not found. 
2817      """ 
2819          super(Optional,self).__init__( exprs, savelist=False ) 
2820          self.defaultValue = default 
2821          self.mayReturnEmpty = True 
 2822   
2823 -    def parseImpl( self, instring, loc, doActions=True ): 
 2824          try: 
2825              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2826          except (ParseException,IndexError): 
2827              if self.defaultValue is not _optionalNotMatched: 
2828                  if self.expr.resultsName: 
2829                      tokens = ParseResults([ self.defaultValue ]) 
2830                      tokens[self.expr.resultsName] = self.defaultValue 
2831                  else: 
2832                      tokens = [ self.defaultValue ] 
2833              else: 
2834                  tokens = [] 
2835          return loc, tokens 
 2836   
2838          if hasattr(self,"name"): 
2839              return self.name 
2840   
2841          if self.strRepr is None: 
2842              self.strRepr = "[" + _ustr(self.expr) + "]" 
2843   
2844          return self.strRepr 
  2845   
2846   
2847 -class SkipTo(ParseElementEnhance): 
 2848      """Token for skipping over all undefined text until the matched expression is found. 
2849         If C{include} is set to true, the matched expression is also parsed (the skipped text 
2850         and matched expression are returned as a 2-element list).  The C{ignore} 
2851         argument is used to define grammars (typically quoted strings and comments) that 
2852         might contain false matches. 
2853      """ 
2854 -    def __init__( self, other, include=False, ignore=None, failOn=None ): 
 2855          super( SkipTo, self ).__init__( other ) 
2856          self.ignoreExpr = ignore 
2857          self.mayReturnEmpty = True 
2858          self.mayIndexError = False 
2859          self.includeMatch = include 
2860          self.asList = False 
2861          if failOn is not None and isinstance(failOn, basestring): 
2862              self.failOn = Literal(failOn) 
2863          else: 
2864              self.failOn = failOn 
2865          self.errmsg = "No match found for "+_ustr(self.expr) 
 2866   
2867 -    def parseImpl( self, instring, loc, doActions=True ): 
 2868          startLoc = loc 
2869          instrlen = len(instring) 
2870          expr = self.expr 
2871          failParse = False 
2872          while loc <= instrlen: 
2873              try: 
2874                  if self.failOn: 
2875                      try: 
2876                          self.failOn.tryParse(instring, loc) 
2877                      except ParseBaseException: 
2878                          pass 
2879                      else: 
2880                          failParse = True 
2881                          raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 
2882                      failParse = False 
2883                  if self.ignoreExpr is not None: 
2884                      while 1: 
2885                          try: 
2886                              loc = self.ignoreExpr.tryParse(instring,loc) 
2887                               
2888                          except ParseBaseException: 
2889                              break 
2890                  expr._parse( instring, loc, doActions=False, callPreParse=False ) 
2891                  skipText = instring[startLoc:loc] 
2892                  if self.includeMatch: 
2893                      loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 
2894                      if mat: 
2895                          skipRes = ParseResults( skipText ) 
2896                          skipRes += mat 
2897                          return loc, [ skipRes ] 
2898                      else: 
2899                          return loc, [ skipText ] 
2900                  else: 
2901                      return loc, [ skipText ] 
2902              except (ParseException,IndexError): 
2903                  if failParse: 
2904                      raise 
2905                  else: 
2906                      loc += 1 
2907          exc = self.myException 
2908          exc.loc = loc 
2909          exc.pstr = instring 
2910          raise exc 
  2911   
2912 -class Forward(ParseElementEnhance): 
 2913      """Forward declaration of an expression to be defined later - 
2914         used for recursive grammars, such as algebraic infix notation. 
2915         When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 
2916   
2917         Note: take care when assigning to C{Forward} not to overlook precedence of operators. 
2918         Specifically, '|' has a lower precedence than '<<', so that:: 
2919            fwdExpr << a | b | c 
2920         will actually be evaluated as:: 
2921            (fwdExpr << a) | b | c 
2922         thereby leaving b and c out as parseable alternatives.  It is recommended that you 
2923         explicitly group the values inserted into the C{Forward}:: 
2924            fwdExpr << (a | b | c) 
2925         Converting to use the '<<=' operator instead will avoid this problem. 
2926      """ 
2929   
2931          if isinstance( other, basestring ): 
2932              other = ParserElement.literalStringClass(other) 
2933          self.expr = other 
2934          self.mayReturnEmpty = other.mayReturnEmpty 
2935          self.strRepr = None 
2936          self.mayIndexError = self.expr.mayIndexError 
2937          self.mayReturnEmpty = self.expr.mayReturnEmpty 
2938          self.setWhitespaceChars( self.expr.whiteChars ) 
2939          self.skipWhitespace = self.expr.skipWhitespace 
2940          self.saveAsList = self.expr.saveAsList 
2941          self.ignoreExprs.extend(self.expr.ignoreExprs) 
2942          return None 
 2943      __ilshift__ = __lshift__ 
2944       
2946          self.skipWhitespace = False 
2947          return self 
 2948   
2950          if not self.streamlined: 
2951              self.streamlined = True 
2952              if self.expr is not None: 
2953                  self.expr.streamline() 
2954          return self 
 2955   
2956 -    def validate( self, validateTrace=[] ): 
 2957          if self not in validateTrace: 
2958              tmp = validateTrace[:]+[self] 
2959              if self.expr is not None: 
2960                  self.expr.validate(tmp) 
2961          self.checkRecursion([]) 
 2962   
2964          if hasattr(self,"name"): 
2965              return self.name 
2966   
2967          self._revertClass = self.__class__ 
2968          self.__class__ = _ForwardNoRecurse 
2969          try: 
2970              if self.expr is not None: 
2971                  retString = _ustr(self.expr) 
2972              else: 
2973                  retString = "None" 
2974          finally: 
2975              self.__class__ = self._revertClass 
2976          return self.__class__.__name__ + ": " + retString 
 2977   
2979          if self.expr is not None: 
2980              return super(Forward,self).copy() 
2981          else: 
2982              ret = Forward() 
2983              ret << self 
2984              return ret 
  2985   
2989   
2991      """Abstract subclass of C{ParseExpression}, for converting parsed results.""" 
2992 -    def __init__( self, expr, savelist=False ): 
  2995   
2996 -class Upcase(TokenConverter): 
 2997      """Converter to upper case all matching tokens.""" 
2999          super(Upcase,self).__init__(*args) 
3000          warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
3001                         DeprecationWarning,stacklevel=2) 
 3002   
3003 -    def postParse( self, instring, loc, tokenlist ): 
 3004          return list(map( string.upper, tokenlist )) 
  3005   
3006   
3008      """Converter to concatenate all matching tokens to a single string. 
3009         By default, the matching patterns must also be contiguous in the input string; 
3010         this can be disabled by specifying C{'adjacent=False'} in the constructor. 
3011      """ 
3012 -    def __init__( self, expr, joinString="", adjacent=True ): 
 3013          super(Combine,self).__init__( expr ) 
3014           
3015          if adjacent: 
3016              self.leaveWhitespace() 
3017          self.adjacent = adjacent 
3018          self.skipWhitespace = True 
3019          self.joinString = joinString 
3020          self.callPreparse = True 
 3021   
3028   
3029 -    def postParse( self, instring, loc, tokenlist ): 
 3030          retToks = tokenlist.copy() 
3031          del retToks[:] 
3032          retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 
3033   
3034          if self.resultsName and len(retToks.keys())>0: 
3035              return [ retToks ] 
3036          else: 
3037              return retToks 
  3038   
3039 -class Group(TokenConverter): 
 3040      """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" 
3042          super(Group,self).__init__( expr ) 
3043          self.saveAsList = True 
 3044   
3045 -    def postParse( self, instring, loc, tokenlist ): 
 3046          return [ tokenlist ] 
  3047   
3048 -class Dict(TokenConverter): 
 3049      """Converter to return a repetitive expression as a list, but also as a dictionary. 
3050         Each element can also be referenced using the first token in the expression as its key. 
3051         Useful for tabular report scraping when the first column can be used as a item key. 
3052      """ 
3054          super(Dict,self).__init__( exprs ) 
3055          self.saveAsList = True 
 3056   
3057 -    def postParse( self, instring, loc, tokenlist ): 
 3058          for i,tok in enumerate(tokenlist): 
3059              if len(tok) == 0: 
3060                  continue 
3061              ikey = tok[0] 
3062              if isinstance(ikey,int): 
3063                  ikey = _ustr(tok[0]).strip() 
3064              if len(tok)==1: 
3065                  tokenlist[ikey] = _ParseResultsWithOffset("",i) 
3066              elif len(tok)==2 and not isinstance(tok[1],ParseResults): 
3067                  tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 
3068              else: 
3069                  dictvalue = tok.copy()  
3070                  del dictvalue[0] 
3071                  if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 
3072                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 
3073                  else: 
3074                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 
3075   
3076          if self.resultsName: 
3077              return [ tokenlist ] 
3078          else: 
3079              return tokenlist 
  3080   
3081   
3083      """Converter for ignoring the results of a parsed expression.""" 
3084 -    def postParse( self, instring, loc, tokenlist ): 
 3086   
 3089   
3090   
3092      """Wrapper for parse actions, to ensure they are only called once.""" 
3094          self.callable = _trim_arity(methodCall) 
3095          self.called = False 
 3097          if not self.called: 
3098              results = self.callable(s,l,t) 
3099              self.called = True 
3100              return results 
3101          raise ParseException(s,l,"") 
  3104   
3106      """Decorator for debugging parse actions.""" 
3107      f = _trim_arity(f) 
3108      def z(*paArgs): 
3109          thisFunc = f.func_name 
3110          s,l,t = paArgs[-3:] 
3111          if len(paArgs)>3: 
3112              thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 
3113          sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 
3114          try: 
3115              ret = f(*paArgs) 
3116          except Exception: 
3117              exc = sys.exc_info()[1] 
3118              sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 
3119              raise 
3120          sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 
3121          return ret 
 3122      try: 
3123          z.__name__ = f.__name__ 
3124      except AttributeError: 
3125          pass 
3126      return z 
3127   
3128   
3129   
3130   
3132      """Helper to define a delimited list of expressions - the delimiter defaults to ','. 
3133         By default, the list elements and delimiters can have intervening whitespace, and 
3134         comments, but this can be overridden by passing C{combine=True} in the constructor. 
3135         If C{combine} is set to C{True}, the matching tokens are returned as a single token 
3136         string, with the delimiters included; otherwise, the matching tokens are returned 
3137         as a list of tokens, with the delimiters suppressed. 
3138      """ 
3139      dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 
3140      if combine: 
3141          return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 
3142      else: 
3143          return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 
 3144   
3146      """Helper to define a counted list of expressions. 
3147         This helper defines a pattern of the form:: 
3148             integer expr expr expr... 
3149         where the leading integer tells how many expr expressions follow. 
3150         The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 
3151      """ 
3152      arrayExpr = Forward() 
3153      def countFieldParseAction(s,l,t): 
3154          n = t[0] 
3155          arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 
3156          return [] 
 3157      if intExpr is None: 
3158          intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 
3159      else: 
3160          intExpr = intExpr.copy() 
3161      intExpr.setName("arrayLen") 
3162      intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 
3163      return ( intExpr + arrayExpr ) 
3164   
3166      ret = [] 
3167      for i in L: 
3168          if isinstance(i,list): 
3169              ret.extend(_flatten(i)) 
3170          else: 
3171              ret.append(i) 
3172      return ret 
 3173   
3175      """Helper to define an expression that is indirectly defined from 
3176         the tokens matched in a previous expression, that is, it looks 
3177         for a 'repeat' of a previous expression.  For example:: 
3178             first = Word(nums) 
3179             second = matchPreviousLiteral(first) 
3180             matchExpr = first + ":" + second 
3181         will match C{"1:1"}, but not C{"1:2"}.  Because this matches a 
3182         previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 
3183         If this is not desired, use C{matchPreviousExpr}. 
3184         Do *not* use with packrat parsing enabled. 
3185      """ 
3186      rep = Forward() 
3187      def copyTokenToRepeater(s,l,t): 
3188          if t: 
3189              if len(t) == 1: 
3190                  rep << t[0] 
3191              else: 
3192                   
3193                  tflat = _flatten(t.asList()) 
3194                  rep << And( [ Literal(tt) for tt in tflat ] ) 
3195          else: 
3196              rep << Empty() 
 3197      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3198      return rep 
3199   
3201      """Helper to define an expression that is indirectly defined from 
3202         the tokens matched in a previous expression, that is, it looks 
3203         for a 'repeat' of a previous expression.  For example:: 
3204             first = Word(nums) 
3205             second = matchPreviousExpr(first) 
3206             matchExpr = first + ":" + second 
3207         will match C{"1:1"}, but not C{"1:2"}.  Because this matches by 
3208         expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 
3209         the expressions are evaluated first, and then compared, so 
3210         C{"1"} is compared with C{"10"}. 
3211         Do *not* use with packrat parsing enabled. 
3212      """ 
3213      rep = Forward() 
3214      e2 = expr.copy() 
3215      rep << e2 
3216      def copyTokenToRepeater(s,l,t): 
3217          matchTokens = _flatten(t.asList()) 
3218          def mustMatchTheseTokens(s,l,t): 
3219              theseTokens = _flatten(t.asList()) 
3220              if  theseTokens != matchTokens: 
3221                  raise ParseException("",0,"") 
 3222          rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 
3223      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3224      return rep 
3225   
3227       
3228      for c in r"\^-]": 
3229          s = s.replace(c,_bslash+c) 
3230      s = s.replace("\n",r"\n") 
3231      s = s.replace("\t",r"\t") 
3232      return _ustr(s) 
 3233   
3234 -def oneOf( strs, caseless=False, useRegex=True ): 
 3235      """Helper to quickly define a set of alternative Literals, and makes sure to do 
3236         longest-first testing when there is a conflict, regardless of the input order, 
3237         but returns a C{L{MatchFirst}} for best performance. 
3238   
3239         Parameters: 
3240          - strs - a string of space-delimited literals, or a list of string literals 
3241          - caseless - (default=False) - treat all literals as caseless 
3242          - useRegex - (default=True) - as an optimization, will generate a Regex 
3243            object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 
3244            if creating a C{Regex} raises an exception) 
3245      """ 
3246      if caseless: 
3247          isequal = ( lambda a,b: a.upper() == b.upper() ) 
3248          masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 
3249          parseElementClass = CaselessLiteral 
3250      else: 
3251          isequal = ( lambda a,b: a == b ) 
3252          masks = ( lambda a,b: b.startswith(a) ) 
3253          parseElementClass = Literal 
3254   
3255      if isinstance(strs,(list,tuple)): 
3256          symbols = list(strs[:]) 
3257      elif isinstance(strs,basestring): 
3258          symbols = strs.split() 
3259      else: 
3260          warnings.warn("Invalid argument to oneOf, expected string or list", 
3261                  SyntaxWarning, stacklevel=2) 
3262   
3263      i = 0 
3264      while i < len(symbols)-1: 
3265          cur = symbols[i] 
3266          for j,other in enumerate(symbols[i+1:]): 
3267              if ( isequal(other, cur) ): 
3268                  del symbols[i+j+1] 
3269                  break 
3270              elif ( masks(cur, other) ): 
3271                  del symbols[i+j+1] 
3272                  symbols.insert(i,other) 
3273                  cur = other 
3274                  break 
3275          else: 
3276              i += 1 
3277   
3278      if not caseless and useRegex: 
3279           
3280          try: 
3281              if len(symbols)==len("".join(symbols)): 
3282                  return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 
3283              else: 
3284                  return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 
3285          except: 
3286              warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 
3287                      SyntaxWarning, stacklevel=2) 
3288   
3289   
3290       
3291      return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 
 3292   
3294      """Helper to easily and clearly define a dictionary by specifying the respective patterns 
3295         for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 
3296         in the proper order.  The key pattern can include delimiting markers or punctuation, 
3297         as long as they are suppressed, thereby leaving the significant key text.  The value 
3298         pattern can include named results, so that the C{Dict} results can include named token 
3299         fields. 
3300      """ 
3301      return Dict( ZeroOrMore( Group ( key + value ) ) ) 
 3302   
3303 -def originalTextFor(expr, asString=True): 
 3304      """Helper to return the original, untokenized text for a given expression.  Useful to 
3305         restore the parsed fields of an HTML start tag into the raw tag text itself, or to 
3306         revert separate tokens with intervening whitespace back to the original matching 
3307         input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 
3308         require the inspect module to chase up the call stack.  By default, returns a  
3309         string containing the original parsed text.   
3310          
3311         If the optional C{asString} argument is passed as C{False}, then the return value is a  
3312         C{L{ParseResults}} containing any results names that were originally matched, and a  
3313         single token containing the original matched text from the input string.  So if  
3314         the expression passed to C{L{originalTextFor}} contains expressions with defined 
3315         results names, you must set C{asString} to C{False} if you want to preserve those 
3316         results name values.""" 
3317      locMarker = Empty().setParseAction(lambda s,loc,t: loc) 
3318      endlocMarker = locMarker.copy() 
3319      endlocMarker.callPreparse = False 
3320      matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 
3321      if asString: 
3322          extractText = lambda s,l,t: s[t._original_start:t._original_end] 
3323      else: 
3324          def extractText(s,l,t): 
3325              del t[:] 
3326              t.insert(0, s[t._original_start:t._original_end]) 
3327              del t["_original_start"] 
3328              del t["_original_end"] 
 3329      matchExpr.setParseAction(extractText) 
3330      return matchExpr 
3331   
3333      """Helper to undo pyparsing's default grouping of And expressions, even 
3334         if all but one are non-empty.""" 
3335      return TokenConverter(expr).setParseAction(lambda t:t[0]) 
3336   
3337   
3338  empty       = Empty().setName("empty") 
3339  lineStart   = LineStart().setName("lineStart") 
3340  lineEnd     = LineEnd().setName("lineEnd") 
3341  stringStart = StringStart().setName("stringStart") 
3342  stringEnd   = StringEnd().setName("stringEnd") 
3343   
3344  _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 
3345  _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ]) 
3346  _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 
3347  _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 
3348  _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 
3349  _charRange = Group(_singleChar + Suppress("-") + _singleChar) 
3350  _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 
3351   
3352  _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 
3353   
3355      r"""Helper to easily define string ranges for use in Word construction.  Borrows 
3356         syntax from regexp '[]' string range definitions:: 
3357            srange("[0-9]")   -> "0123456789" 
3358            srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz" 
3359            srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 
3360         The input string must be enclosed in []'s, and the returned string is the expanded 
3361         character set joined into a single string. 
3362         The values enclosed in the []'s may be:: 
3363            a single character 
3364            an escaped character with a leading backslash (such as \- or \]) 
3365            an escaped hex character with a leading '\x' (\x21, which is a '!' character)  
3366              (\0x## is also supported for backwards compatibility)  
3367            an escaped octal character with a leading '\0' (\041, which is a '!' character) 
3368            a range of any of the above, separated by a dash ('a-z', etc.) 
3369            any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 
3370      """ 
3371      try: 
3372          return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 
3373      except: 
3374          return "" 
 3375   
3377      """Helper method for defining parse actions that require matching at a specific 
3378         column in the input text. 
3379      """ 
3380      def verifyCol(strg,locn,toks): 
3381          if col(locn,strg) != n: 
3382              raise ParseException(strg,locn,"matched token not at column %d" % n) 
 3383      return verifyCol 
3384   
3386      """Helper method for common parse actions that simply return a literal value.  Especially 
3387         useful when used with C{L{transformString<ParserElement.transformString>}()}. 
3388      """ 
3389      def _replFunc(*args): 
3390          return [replStr] 
 3391      return _replFunc 
3392   
3394      """Helper parse action for removing quotation marks from parsed quoted strings. 
3395         To use, add this parse action to quoted string using:: 
3396           quotedString.setParseAction( removeQuotes ) 
3397      """ 
3398      return t[0][1:-1] 
 3399   
3401      """Helper parse action to convert tokens to upper case.""" 
3402      return [ tt.upper() for tt in map(_ustr,t) ] 
 3403   
3405      """Helper parse action to convert tokens to lower case.""" 
3406      return [ tt.lower() for tt in map(_ustr,t) ] 
 3407   
3408 -def keepOriginalText(s,startLoc,t): 
 3409      """DEPRECATED - use new helper method C{L{originalTextFor}}. 
3410         Helper parse action to preserve original parsed text, 
3411         overriding any nested parse actions.""" 
3412      try: 
3413          endloc = getTokensEndLoc() 
3414      except ParseException: 
3415          raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 
3416      del t[:] 
3417      t += ParseResults(s[startLoc:endloc]) 
3418      return t 
 3419   
3421      """Method to be called from within a parse action to determine the end 
3422         location of the parsed tokens.""" 
3423      import inspect 
3424      fstack = inspect.stack() 
3425      try: 
3426           
3427          for f in fstack[2:]: 
3428              if f[3] == "_parseNoCache": 
3429                  endloc = f[0].f_locals["loc"] 
3430                  return endloc 
3431          else: 
3432              raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 
3433      finally: 
3434          del fstack 
 3435   
3464   
3468   
3472   
3474      """Helper to create a validating parse action to be used with start tags created 
3475         with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 
3476         with a required attribute value, to avoid false matches on common tags such as 
3477         C{<TD>} or C{<DIV>}. 
3478   
3479         Call C{withAttribute} with a series of attribute names and values. Specify the list 
3480         of filter attributes names and values as: 
3481          - keyword arguments, as in C{(align="right")}, or 
3482          - as an explicit dict with C{**} operator, when an attribute name is also a Python 
3483            reserved word, as in C{**{"class":"Customer", "align":"right"}} 
3484          - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 
3485         For attribute names with a namespace prefix, you must use the second form.  Attribute 
3486         names are matched insensitive to upper/lower case. 
3487   
3488         To verify that the attribute exists, but without specifying a value, pass 
3489         C{withAttribute.ANY_VALUE} as the value. 
3490         """ 
3491      if args: 
3492          attrs = args[:] 
3493      else: 
3494          attrs = attrDict.items() 
3495      attrs = [(k,v) for k,v in attrs] 
3496      def pa(s,l,tokens): 
3497          for attrName,attrValue in attrs: 
3498              if attrName not in tokens: 
3499                  raise ParseException(s,l,"no matching attribute " + attrName) 
3500              if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 
3501                  raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 
3502                                              (attrName, tokens[attrName], attrValue)) 
 3503      return pa 
3504  withAttribute.ANY_VALUE = object() 
3505   
3506  opAssoc = _Constants() 
3507  opAssoc.LEFT = object() 
3508  opAssoc.RIGHT = object() 
3509   
3511      """Helper method for constructing grammars of expressions made up of 
3512         operators working in a precedence hierarchy.  Operators may be unary or 
3513         binary, left- or right-associative.  Parse actions can also be attached 
3514         to operator expressions. 
3515   
3516         Parameters: 
3517          - baseExpr - expression representing the most basic element for the nested 
3518          - opList - list of tuples, one for each operator precedence level in the 
3519            expression grammar; each tuple is of the form 
3520            (opExpr, numTerms, rightLeftAssoc, parseAction), where: 
3521             - opExpr is the pyparsing expression for the operator; 
3522                may also be a string, which will be converted to a Literal; 
3523                if numTerms is 3, opExpr is a tuple of two expressions, for the 
3524                two operators separating the 3 terms 
3525             - numTerms is the number of terms for this operator (must 
3526                be 1, 2, or 3) 
3527             - rightLeftAssoc is the indicator whether the operator is 
3528                right or left associative, using the pyparsing-defined 
3529                constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 
3530             - parseAction is the parse action to be associated with 
3531                expressions matching this operator expression (the 
3532                parse action tuple member may be omitted) 
3533      """ 
3534      ret = Forward() 
3535      lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 
3536      for i,operDef in enumerate(opList): 
3537          opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 
3538          if arity == 3: 
3539              if opExpr is None or len(opExpr) != 2: 
3540                  raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 
3541              opExpr1, opExpr2 = opExpr 
3542          thisExpr = Forward() 
3543          if rightLeftAssoc == opAssoc.LEFT: 
3544              if arity == 1: 
3545                  matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 
3546              elif arity == 2: 
3547                  if opExpr is not None: 
3548                      matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 
3549                  else: 
3550                      matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 
3551              elif arity == 3: 
3552                  matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 
3553                              Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 
3554              else: 
3555                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3556          elif rightLeftAssoc == opAssoc.RIGHT: 
3557              if arity == 1: 
3558                   
3559                  if not isinstance(opExpr, Optional): 
3560                      opExpr = Optional(opExpr) 
3561                  matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
3562              elif arity == 2: 
3563                  if opExpr is not None: 
3564                      matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 
3565                  else: 
3566                      matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 
3567              elif arity == 3: 
3568                  matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 
3569                              Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 
3570              else: 
3571                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3572          else: 
3573              raise ValueError("operator must indicate right or left associativity") 
3574          if pa: 
3575              matchExpr.setParseAction( pa ) 
3576          thisExpr << ( matchExpr | lastExpr ) 
3577          lastExpr = thisExpr 
3578      ret << lastExpr 
3579      return ret 
 3580   
3581  dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 
3582  sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 
3583  quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 
3584  unicodeString = Combine(_L('u') + quotedString.copy()) 
3585   
3587      """Helper method for defining nested lists enclosed in opening and closing 
3588         delimiters ("(" and ")" are the default). 
3589   
3590         Parameters: 
3591          - opener - opening character for a nested list (default="("); can also be a pyparsing expression 
3592          - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 
3593          - content - expression for items within the nested lists (default=None) 
3594          - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 
3595   
3596         If an expression is not provided for the content argument, the nested 
3597         expression will capture all whitespace-delimited content between delimiters 
3598         as a list of separate values. 
3599   
3600         Use the C{ignoreExpr} argument to define expressions that may contain 
3601         opening or closing characters that should not be treated as opening 
3602         or closing characters for nesting, such as quotedString or a comment 
3603         expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 
3604         The default is L{quotedString}, but if no expressions are to be ignored, 
3605         then pass C{None} for this argument. 
3606      """ 
3607      if opener == closer: 
3608          raise ValueError("opening and closing strings cannot be the same") 
3609      if content is None: 
3610          if isinstance(opener,basestring) and isinstance(closer,basestring): 
3611              if len(opener) == 1 and len(closer)==1: 
3612                  if ignoreExpr is not None: 
3613                      content = (Combine(OneOrMore(~ignoreExpr + 
3614                                      CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3615                                  ).setParseAction(lambda t:t[0].strip())) 
3616                  else: 
3617                      content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 
3618                                  ).setParseAction(lambda t:t[0].strip())) 
3619              else: 
3620                  if ignoreExpr is not None: 
3621                      content = (Combine(OneOrMore(~ignoreExpr +  
3622                                      ~Literal(opener) + ~Literal(closer) + 
3623                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3624                                  ).setParseAction(lambda t:t[0].strip())) 
3625                  else: 
3626                      content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 
3627                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3628                                  ).setParseAction(lambda t:t[0].strip())) 
3629          else: 
3630              raise ValueError("opening and closing arguments must be strings if no content expression is given") 
3631      ret = Forward() 
3632      if ignoreExpr is not None: 
3633          ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 
3634      else: 
3635          ret << Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) ) 
3636      return ret 
 3637   
3638 -def indentedBlock(blockStatementExpr, indentStack, indent=True): 
 3639      """Helper method for defining space-delimited indentation blocks, such as 
3640         those used to define block statements in Python source code. 
3641   
3642         Parameters: 
3643          - blockStatementExpr - expression defining syntax of statement that 
3644              is repeated within the indented block 
3645          - indentStack - list created by caller to manage indentation stack 
3646              (multiple statementWithIndentedBlock expressions within a single grammar 
3647              should share a common indentStack) 
3648          - indent - boolean indicating whether block must be indented beyond the 
3649              the current level; set to False for block of left-most statements 
3650              (default=True) 
3651   
3652         A valid block must contain at least one C{blockStatement}. 
3653      """ 
3654      def checkPeerIndent(s,l,t): 
3655          if l >= len(s): return 
3656          curCol = col(l,s) 
3657          if curCol != indentStack[-1]: 
3658              if curCol > indentStack[-1]: 
3659                  raise ParseFatalException(s,l,"illegal nesting") 
3660              raise ParseException(s,l,"not a peer entry") 
 3661   
3662      def checkSubIndent(s,l,t): 
3663          curCol = col(l,s) 
3664          if curCol > indentStack[-1]: 
3665              indentStack.append( curCol ) 
3666          else: 
3667              raise ParseException(s,l,"not a subentry") 
3668   
3669      def checkUnindent(s,l,t): 
3670          if l >= len(s): return 
3671          curCol = col(l,s) 
3672          if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 
3673              raise ParseException(s,l,"not an unindent") 
3674          indentStack.pop() 
3675   
3676      NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 
3677      INDENT = Empty() + Empty().setParseAction(checkSubIndent) 
3678      PEER   = Empty().setParseAction(checkPeerIndent) 
3679      UNDENT = Empty().setParseAction(checkUnindent) 
3680      if indent: 
3681          smExpr = Group( Optional(NL) + 
3682               
3683              INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 
3684      else: 
3685          smExpr = Group( Optional(NL) + 
3686              (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 
3687      blockStatementExpr.ignore(_bslash + LineEnd()) 
3688      return smExpr 
3689   
3690  alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 
3691  punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 
3692   
3693  anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 
3694  commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 
3695  _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 
3696  replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 
3697   
3698   
3699  cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 
3700   
3701  htmlComment = Regex(r"<!--[\s\S]*?-->") 
3702  restOfLine = Regex(r".*").leaveWhitespace() 
3703  dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 
3704  cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 
3705   
3706  javaStyleComment = cppStyleComment 
3707  pythonStyleComment = Regex(r"#.*").setName("Python style comment") 
3708  _noncomma = "".join( [ c for c in printables if c != "," ] ) 
3709  _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
3710                                    Optional( Word(" \t") + 
3711                                              ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 
3712  commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 
3713   
3714   
3715  if __name__ == "__main__": 
3716   
3717 -    def test( teststring ): 
 3718          try: 
3719              tokens = simpleSQL.parseString( teststring ) 
3720              tokenlist = tokens.asList() 
3721              print (teststring + "->"   + str(tokenlist)) 
3722              print ("tokens = "         + str(tokens)) 
3723              print ("tokens.columns = " + str(tokens.columns)) 
3724              print ("tokens.tables = "  + str(tokens.tables)) 
3725              print (tokens.asXML("SQL",True)) 
3726          except ParseBaseException: 
3727              err = sys.exc_info()[1] 
3728              print (teststring + "->") 
3729              print (err.line) 
3730              print (" "*(err.column-1) + "^") 
3731              print (err) 
3732          print() 
 3733   
3734      selectToken    = CaselessLiteral( "select" ) 
3735      fromToken      = CaselessLiteral( "from" ) 
3736   
3737      ident          = Word( alphas, alphanums + "_$" ) 
3738      columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3739      columnNameList = Group( delimitedList( columnName ) ) 
3740      tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3741      tableNameList  = Group( delimitedList( tableName ) ) 
3742      simpleSQL      = ( selectToken + \ 
3743                       ( '*' | columnNameList ).setResultsName( "columns" ) + \ 
3744                       fromToken + \ 
3745                       tableNameList.setResultsName( "tables" ) ) 
3746   
3747      test( "SELECT * from XYZZY, ABC" ) 
3748      test( "select * from SYS.XYZZY" ) 
3749      test( "Select A from Sys.dual" ) 
3750      test( "Select AA,BB,CC from Sys.dual" ) 
3751      test( "Select A, B, C from Sys.dual" ) 
3752      test( "Select A, B, C from Sys.dual" ) 
3753      test( "Xelect A, B, C from Sys.dual" ) 
3754      test( "Select A, B, C frox Sys.dual" ) 
3755      test( "Select" ) 
3756      test( "Select ^^^ frox Sys.dual" ) 
3757      test( "Select A, B, C from Sys.dual, Table2   " ) 
3758