whois/parser.py
changeset 43 f7bf8d6f0547
parent 42 d187963bb7e9
child 44 5cd71f1dc42b
equal deleted inserted replaced
42:d187963bb7e9 43:f7bf8d6f0547
    49         except ValueError as e:
    49         except ValueError as e:
    50             pass  # Wrong format, keep trying
    50             pass  # Wrong format, keep trying
    51     return s
    51     return s
    52 
    52 
    53 
    53 
    54 def cast_date(s):
    54 def cast_date(s, dayfirst=False, yearfirst=False):
    55     """Convert any date string found in WHOIS to a datetime object.
    55     """Convert any date string found in WHOIS to a datetime object.
    56     """
    56     """
    57     if DATEUTIL:
    57     if DATEUTIL:
    58         try:
    58         try:
    59             return dp.parse(s.strip(), tzinfos=tz_data).replace(tzinfo=None)
    59             return dp.parse(
       
    60                 s.strip(),
       
    61                 tzinfos=tz_data,
       
    62                 dayfirst=dayfirst,
       
    63                 yearfirst=yearfirst
       
    64             ).replace(tzinfo=None)
    60         except Exception:
    65         except Exception:
    61             return datetime_parse(s)
    66             return datetime_parse(s)
    62     else:
    67     else:
    63         return datetime_parse(s)
    68         return datetime_parse(s)
    64 
    69 
    79         'name_servers':     'Name Server:\s?(.+)',  # list of name servers
    84         'name_servers':     'Name Server:\s?(.+)',  # list of name servers
    80         'status':           'Status:\s?(.+)',  # list of statuses
    85         'status':           'Status:\s?(.+)',  # list of statuses
    81         'emails':           '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email s
    86         'emails':           '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email s
    82         'dnssec':           'dnssec:\s*([\S]+)',
    87         'dnssec':           'dnssec:\s*([\S]+)',
    83     }
    88     }
       
    89     dayfirst = False
       
    90     yearfirst = False
    84 
    91 
    85     def __init__(self, domain, text, regex=None):
    92     def __init__(self, domain, text, regex=None):
    86         self.domain = domain
    93         self.domain = domain
    87         self.text = text
    94         self.text = text
    88         if regex is not None:
    95         if regex is not None:
    96         if whois_regex:
   103         if whois_regex:
    97             values = []
   104             values = []
    98             for value in re.findall(whois_regex, self.text, re.IGNORECASE):
   105             for value in re.findall(whois_regex, self.text, re.IGNORECASE):
    99                 if isinstance(value, basestring):
   106                 if isinstance(value, basestring):
   100                     # try casting to date format
   107                     # try casting to date format
   101                     value = cast_date(value.strip())
   108                     value = cast_date(value.strip(),
       
   109                                       dayfirst=self.dayfirst,
       
   110                                       yearfirst=self.yearfirst)
   102                 if value and value not in values:
   111                 if value and value not in values:
   103                     # avoid duplicates
   112                     # avoid duplicates
   104                     values.append(value)
   113                     values.append(value)
   105             if len(values) == 1:
   114             if len(values) == 1:
   106                 values = values[0]
   115                 values = values[0]
   638 
   647 
   639     regex = {
   648     regex = {
   640         'expiration_date': 'expires at:\s*(.+)',
   649         'expiration_date': 'expires at:\s*(.+)',
   641     }
   650     }
   642 
   651 
       
   652     dayfirst = True
       
   653 
   643     def __init__(self, domain, text):
   654     def __init__(self, domain, text):
   644         if text.strip() == 'No entries found':
   655         if text.strip() == 'No entries found':
   645             raise PywhoisError(text)
   656             raise PywhoisError(text)
   646         else:
   657         else:
   647             WhoisEntry.__init__(self, domain, text, self.regex)
   658             WhoisEntry.__init__(self, domain, text, self.regex)