Source code for pyparsing_regex._interface

# Pyparsing-like Interface
# ========================
try:
    import pyximport; pyximport.install()
    from pyparsing_regex._core_cython import ParserElement, Structure
    print "using cython"
except ImportError as e:
    print "NOT using cython", e
    from pyparsing_regex._core import ParserElement, Structure

import pyparsing_regex._helpers_regex as hre
import regex
import __builtin__
from copy import copy

import cPickle

def deepcopy(o):
    """fast deepcopy alternative"""
    return cPickle.loads(cPickle.dumps(o, -1))

# emulate generic methods from pyparsing itself:
from pyparsing import srange




[docs]class Literal(ParserElement):
    def __init__(self, str):
        super(Literal, self).__init__(regex.escape(str))

[docs]class Regex(ParserElement):
    def __init__(self, pattern, flags=0):
        """flags are locally scoped and will only effect the supplied pattern, nothing more"""
        if flags:
            str_flags = hre.decodeflags(flags)
            pattern = r"(?%s:%s)"%(str_flags, pattern)
        super(Regex, self).__init__(pattern)


[docs]class Word(ParserElement):
    def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, excludeChars=None):
        """not implemented kwargs: asKeyword """
        if max != 0 and min > max:
            raise RuntimeError("min <= max needed")

        if excludeChars:
            initChars = initChars + "--" + excludeChars
            if bodyChars:
                bodyChars = bodyChars + "--" + excludeChars

        if exact == 1 or max == 1:
            pattern = r"[%s]{1}"%(initChars)
        elif exact > 1:
            if bodyChars:
                pattern = r"[%s]{1}[%s]{%s}"%(initChars, bodyChars, exact-1)
            else:
                pattern = r"[%s]{%s}"%(initChars, exact)
        elif max > 1:
            if bodyChars:
                pattern = r"[%s]{1}[%s]{%s,%s}"%(initChars, bodyChars, __builtin__.max(min-1,0), max-1)
            else:
                pattern = r"[%s]{%s,%s}"%(initChars, min, max)
        else: # arbitrary upper bound
            if bodyChars:
                pattern = r"[%s]{1}[%s]{%s,}"%(initChars, bodyChars, __builtin__.max(min-1,0))
            else:
                pattern = r"[%s]{%s,}"%(initChars, min)
        super(Word, self).__init__(pattern)

[docs]class CharsNotIn(Word):
    def __init__(self, notChars, min=1, max=0, exact=0):
        super(CharsNotIn, self).__init__("^%s" % notChars, min=min, max=max, exact=exact)


def _silent_pattern(expr):
    if isinstance(expr, basestring):
        return regex.escape(expr)
    else:
        return hre.begins_not_silently_grouped.sub("(?:", expr.pattern)

[docs]class SkipTo(ParserElement):
    def __init__(self, expr, include_=False):
        """Grouped by default. not supported: ignore=None, failOn=None.

        Token for skipping over all undefined text until the matched expression is found.
        If C{include} is set to true, the matched expression is also parsed (the skipped text
        and matched expression are returned as a 2-element list).  The C{ignore}
        argument is used to define grammars (typically quoted strings and comments) that
        might contain false matches.
        """
        pattern = r"(?:.(?!%s))*." % _silent_pattern(expr)
        if include_:
            pattern += _silent_pattern(expr)
        super(SkipTo, self).__init__(pattern)


[docs]class FollowedBy(ParserElement):
    def __init__(self, expr):
        """Lookahead matching of the given parse expression.  C{FollowedBy}
        does *not* advance the parsing position within the input string, it only
        verifies that the specified parse expression matches at the current
        position.  C{FollowedBy} always returns a null token list."""
        pattern = r"(?=%s)" % _silent_pattern(expr) # standard lookahead
        super(FollowedBy, self).__init__(pattern, silent=True)

[docs]class Combine(ParserElement):
    def __init__(self, expr):
        """Converter to concatenate all matching tokens to a single string.
        By default, the matching patterns must also be contiguous in the input string;
        this can be disabled by specifying C{'adjacent=False'} in the constructor."""
        super(Combine, self).__init__(_silent_pattern(expr))


[docs]class Suppress(ParserElement):
    def __init__(self, expr):
        super(Suppress, self).__init__(_silent_pattern(expr), silent=True)


[docs]class StringStart(ParserElement):
    def __init__(self):
        """matches beginning of the text"""
        super(StringStart, self).__init__(r"^")

[docs]class StringEnd(ParserElement):
    def __init__(self):
        """matches the end of the text"""
        super(StringEnd, self).__init__(r"$")

[docs]class LineStart(Regex):
    def __init__(self):
        """matches beginning of a line (lines delimited by \n characters)"""
        super(LineStart, self).__init__(r"^", regex.MULTILINE)

[docs]class LineEnd(Regex):
    def __init__(self):
        """matches the end of a line"""
        super(LineEnd, self).__init__(r"$", regex.MULTILINE)


# For the rest, functions are much easier than classes, so we keep it with them

[docs]def And(iterable):
    """__dict__ of first element will be passed through And result """
    try:
        gen = iter(iterable)
        first = next(gen)
        first.__class__ = ParserElement
        base = first + next(gen) # once (+) to have a new element
        for expr in gen:
            base += expr  # in place addition to avoid copying
        return base
    except StopIteration:  # only one element
        return first


[docs]def MatchFirst(iterable):
    """__dict__ of first element will be passed through MatchFirst result """
    try:
        gen = iter(iterable)
        first = next(gen)
        first.__class__ = ParserElement
        base = first | next(gen) # once (|) to have a new element
        for expr in gen:
            base |= expr  # in place or to avoid copying
        return base
    except StopIteration:  # only one element
        return first

#Or __xor__ and Each __and__ are missing - takes more time to implement

[docs]def Optional(expr, default=None):
    if default is not None:
        Structure.EMPTY_DEFAULT = default
    cp = copy(expr)
    cp.__class__ = ParserElement
    cp.pattern = r"%s?" % hre.ensure_grouping(cp.pattern)
    return cp

[docs]def Group(expr):
    g = deepcopy(expr)
    g.__class__ = ParserElement
    g.group(silent=True)
    return g

[docs]def GroupLiftKeys(expr):
    g = deepcopy(expr)
    g.__class__ = ParserElement
    g.group(silent=True, liftkeys=True)
    return g

[docs]def OneOrMore(expr):
    return Repeat(expr, min=1)

[docs]def ZeroOrMore(expr):
    return Repeat(expr)

[docs]def Repeat(expr, min=0, max=None):
    expr = deepcopy(expr)
    expr.__class__ = ParserElement
    expr.repeat(min=min, max=max)
    return expr





# my own extras:
# ==============

[docs]def setResultsNameInPlace(expr, name, listAllMatches=False):
    """ adds resultsname in place, no copy as with method

    :param expr: parser to set resultsname
    :param name: resultsname
    :param listAllMatches: whether strings matches should all be listed, or only last match should be kept
    """
    expr.setResultsName(name)
    return expr