"""
Utility library for validating IRI that conform to RFC 3987
"""
__author__ = 'VMware, Inc'
__copyright__ = 'Copyright (c) 2015 VMware, Inc.  All rights reserved.'
import logging
import re
import ast
from vmware.vapi.exception import CoreException
from vmware.vapi.l10n.runtime import message_factory
logger = logging.getLogger(__name__)
[docs]class URIValidator(object):
    """
    Helper class for validation of IRI's
    """
    # The regex is taken from RFC 3987 (IRI)
    # Note 1: RFC 3987 (IRI) uses HEXDIG
    #         HEXDIG is specified in RFC 2234 is [0-9][A-F]
    #         RFC 2396 allows HEXDIG to be [0-9][A-F][a-f]
    #         So, in this regex, HEXDIG uses the RFC 2396 standard
    _rules = (
        ('IRI_reference', r"(?:{IRI}|{irelative_ref})"),
        ('IRI', r"{absolute_IRI}(?:\#{ifragment})?"),
        ('absolute_IRI', r"{scheme}:{ihier_part}(?:\?{iquery})?"),
        ('irelative_ref', (r"(?:{irelative_part}"
                           r"(?:\?{iquery})?(?:\#{ifragment})?)")),
        ('ihier_part', (r"(?://{iauthority}{ipath_abempty}"
                        r"|{ipath_absolute}|{ipath_rootless}|{ipath_empty})")),
        ('irelative_part', (r"(?://{iauthority}{ipath_abempty}"
                            r"|{ipath_absolute}|{ipath_noscheme}|{ipath_empty})")),
        ('iauthority', r"(?:{iuserinfo}@)?{ihost}(?::{port})?"),
        ('iuserinfo', r"(?:{iunreserved}|{pct_encoded}|{sub_delims}|:)*"),
        ('ihost', r"(?:{IP_literal}|{IPv4address}|{ireg_name})"),
        ('ireg_name', r"(?:{iunreserved}|{pct_encoded}|{sub_delims})*"),
        ('ipath', (r"(?:{ipath_abempty}|{ipath_absolute}|{ipath_noscheme}"
                   r"|{ipath_rootless}|{ipath_empty})")),
        ('ipath_empty', r""),
        ('ipath_rootless', r"{isegment_nz}(?:/{isegment})*"),
        ('ipath_noscheme', r"{isegment_nz_nc}(?:/{isegment})*"),
        ('ipath_absolute', r"/(?:{isegment_nz}(?:/{isegment})*)?"),
        ('ipath_abempty', r"(?:/{isegment})*"),
        ('isegment_nz_nc', r"(?:{iunreserved}|{pct_encoded}|{sub_delims}|@)+"),
        ('isegment_nz', r"{ipchar}+"),
        ('isegment', r"{ipchar}*"),
        ('iquery', r"(?:{ipchar}|{iprivate}|/|\?)*"),
        ('ifragment', r"(?:{ipchar}|/|\?)*"),
        ('ipchar', r"(?:{iunreserved}|{pct_encoded}|{sub_delims}|:|@)"),
        ('iunreserved', r"(?:[a-zA-Z0-9._~-]|{ucschar})"),
        ('iprivate', r"[\uE000-\uF8FF]"),
        ('ucschar', (r"[\xA0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]")),
        ('scheme', r"[a-zA-Z][a-zA-Z0-9+.-]*"),
        ('port', r"[0-9]*"),
        ('IP_literal', r"\[(?:{IPv6address}|{IPvFuture})\]"),
        ('IPv6address', (r"(?:                             (?:{h16}:){{6}} {ls32}"
                         r"|                            :: (?:{h16}:){{5}} {ls32}"
                         r"|                    {h16}?  :: (?:{h16}:){{4}} {ls32}"
                         r"| (?:(?:{h16}:)?     {h16})? :: (?:{h16}:){{3}} {ls32}"
                         r"| (?:(?:{h16}:){{,2}}{h16})? :: (?:{h16}:){{2}} {ls32}"
                         r"| (?:(?:{h16}:){{,3}}{h16})? :: (?:{h16}:)      {ls32}"
                         r"| (?:(?:{h16}:){{,4}}{h16})? ::                 {ls32}"
                         r"| (?:(?:{h16}:){{,5}}{h16})? ::                 {h16} "
                         r"| (?:(?:{h16}:){{,6}}{h16})? ::                      )"
                         ).replace(' ', '')),
        ('ls32', r"(?:{h16}:{h16}|{IPv4address})"),
        ('h16', r"[0-9A-Fa-f]{{1,4}}"),
        ('IPv4address', r"(?:{dec_octet}\.){{3}}{dec_octet}"),
        ('dec_octet', r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"),
        ('IPvFuture', r"v[0-9A-Fa-f]+\.(?:{unreserved}|{sub_delims}|:)+"),
        ('unreserved', r"[a-zA-Z0-9_.~-]"),
        ('reserved', r"(?:{gen_delims}|{sub_delims})"),
        ('pct_encoded', r"%[0-9A-Fa-f][0-9A-Fa-f]"),
        ('gen_delims', r"[:/?#[\]@]"),
        ('sub_delims', r"[!$&'()*+,;=]"),
    )
    # Use a dictionary to save the compiled regexs
    compiled_regex = {}
    # Just compute the regex for IRI's now.
    for rule_type in ['IRI', 'IRI_reference']:
        regex = compiled_regex.get(rule_type)
        if regex is None:
            final_regex = {}
            for key, value in reversed(_rules):
                final_regex[key] = value.format(**final_regex)
            regex_str = ''.join(['^%(', rule_type, ')s$'])
            regex_str = regex_str % final_regex
            # ``\u`` and ``\U`` escapes must be preprocessed
            # http://bugs.python.org/issue3665
            unicode_wrap = 'u"""{0}"""'
            regex_str = ast.literal_eval(unicode_wrap.format(regex_str))
            regex = re.compile(regex_str)
            compiled_regex[rule_type] = regex
    del _rules
    @staticmethod
[docs]    def validate(iri):
        """
        Validate the given IRI string
        :type  iri: :class:`str`
        :param iri: IRI string to be validated
        """
        # Input could be an absolute or relative IRI string, validate
        # it against both of them
        match = None
        for iri_type in ['IRI', 'IRI_reference']:
            regex = URIValidator.compiled_regex.get(iri_type)
            match = regex.match(iri)
            if match is not None:
                break
        else:
            msg = message_factory.get_message(
                'vapi.bindings.typeconverter.uri.invalid.format',
                repr(iri))
            logger.debug(msg)
            raise CoreException(msg)