whois/parser.py
changeset 12 c57439b500cb
parent 11 5083c26d8f93
child 13 f8d7b881701d
equal deleted inserted replaced
11:5083c26d8f93 12:c57439b500cb
       
     1 # parser.py - Module for parsing whois response data
       
     2 # Copyright (c) 2008 Andrey Petrov
       
     3 #
       
     4 # This module is part of pywhois and is released under
       
     5 # the MIT license: http://www.opensource.org/licenses/mit-license.php
       
     6 
       
     7 import re
       
     8 from datetime import datetime
       
     9    
       
    10 
       
    11 class PywhoisError(Exception):
       
    12     pass
       
    13 
       
    14 
       
    15 def cast_date(s):
       
    16     """Convert any date string found in WHOIS to a datetime object.
       
    17     """
       
    18     known_formats = [
       
    19         '%d-%b-%Y', 				# 02-jan-2000
       
    20         '%Y-%m-%d', 				# 2000-01-02
       
    21         '%d.%m.%Y', 				# 2000-01-02
       
    22         '%Y.%m.%d',                 # 2000.01.02
       
    23         '%Y/%m/%d',                 # 2000/01/02
       
    24         '%d-%b-%Y %H:%M:%S %Z',		# 24-Jul-2009 13:20:03 UTC
       
    25         '%a %b %d %H:%M:%S %Z %Y',  # Tue Jun 21 23:59:59 GMT 2011
       
    26         '%Y-%m-%dT%H:%M:%SZ',       # 2007-01-26T19:10:31Z
       
    27     ]
       
    28 
       
    29     for known_format in known_formats:
       
    30         try:
       
    31             return datetime.strptime(s.strip(), known_format)
       
    32         except ValueError as e:
       
    33             pass # Wrong format, keep trying
       
    34     return s
       
    35 
       
    36 
       
    37 class WhoisEntry(object):
       
    38     """Base class for parsing a Whois entries.
       
    39     """
       
    40     # regular expressions to extract domain data from whois profile
       
    41     # child classes will override this
       
    42     _regex = {
       
    43         'domain_name':      'Domain Name:\s?(.+)',
       
    44         'registrar':        'Registrar:\s?(.+)',
       
    45         'whois_server':     'Whois Server:\s?(.+)',
       
    46         'referral_url':     'Referral URL:\s?(.+)', # http url of whois_server
       
    47         'updated_date':     'Updated Date:\s?(.+)',
       
    48         'creation_date':    'Creation Date:\s?(.+)',
       
    49         'expiration_date':  'Expiration Date:\s?(.+)',
       
    50         'name_servers':     'Name Server:\s?(.+)', # list of name servers
       
    51         'status':           'Status:\s?(.+)', # list of statuses
       
    52         'emails':           '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses
       
    53     }
       
    54 
       
    55     def __init__(self, domain, text, regex=None):
       
    56         self.domain = domain
       
    57         self.text = text
       
    58         if regex is not None:
       
    59             self._regex = regex
       
    60 
       
    61 
       
    62     def __getattr__(self, attr):
       
    63         """The first time an attribute is called it will be calculated here.
       
    64         The attribute is then set to be accessed directly by subsequent calls.
       
    65         """
       
    66         whois_regex = self._regex.get(attr)
       
    67         if whois_regex:
       
    68             values = re.findall(whois_regex, self.text, re.IGNORECASE)
       
    69             # try casting to date format
       
    70             values = [cast_date(value.strip()) for value in values]
       
    71             if len(values) == 1:
       
    72                 values = values[0]
       
    73             setattr(self, attr, values)
       
    74             return getattr(self, attr)
       
    75         else:
       
    76             raise KeyError('Unknown attribute: %s' % attr)
       
    77 
       
    78     def __str__(self):
       
    79         """Print all whois properties of domain
       
    80         """
       
    81         return '\n'.join('%s: %s' % (attr, str(getattr(self, attr))) for attr in self.attrs())
       
    82 
       
    83 
       
    84     def attrs(self):
       
    85         """Return list of attributes that can be extracted for this domain
       
    86         """
       
    87         return sorted(self._regex.keys())
       
    88 
       
    89 
       
    90     @staticmethod
       
    91     def load(domain, text):
       
    92         """Given whois output in ``text``, return an instance of ``WhoisEntry`` that represents its parsed contents.
       
    93         """
       
    94         if text.strip() == 'No whois server is known for this kind of object.':
       
    95             raise PywhoisError(text)
       
    96 
       
    97         if domain.endswith('.com'):
       
    98             return WhoisCom(domain, text)
       
    99         elif domain.endswith('.net'):
       
   100             return WhoisNet(domain, text)
       
   101         elif domain.endswith('.org'):
       
   102             return WhoisOrg(domain, text)
       
   103         elif domain.endswith('.name'):
       
   104         	return WhoisName(domain, text)
       
   105         elif domain.endswith('.me'):
       
   106         	return WhoisMe(domain, text)
       
   107         elif domain.endswith('.ru'):
       
   108             return WhoisRu(domain, text)
       
   109         elif domain.endswith('.us'):
       
   110         	return WhoisUs(domain, text)
       
   111         elif domain.endswith('.uk'):
       
   112         	return WhoisUk(domain, text)
       
   113         elif domain.endswith('.fr'):
       
   114             return WhoisFr(domain, text)
       
   115         elif domain.endswith('.fi'):
       
   116         	return WhoisFi(domain, text)
       
   117         elif domain.endswith('.jp'):
       
   118             return WhoisJp(domain, text)
       
   119         elif domain.endswith('.pl'):
       
   120             return WhoisPl(domain, text)
       
   121         else:
       
   122             return WhoisEntry(domain, text)
       
   123 
       
   124 
       
   125 
       
   126 class WhoisCom(WhoisEntry):
       
   127     """Whois parser for .com domains
       
   128     """
       
   129     def __init__(self, domain, text):
       
   130         if 'No match for "' in text:
       
   131             raise PywhoisError(text)
       
   132         else:
       
   133             WhoisEntry.__init__(self, domain, text) 
       
   134 
       
   135 
       
   136 class WhoisNet(WhoisEntry):
       
   137     """Whois parser for .net domains
       
   138     """
       
   139     def __init__(self, domain, text):
       
   140         if 'No match for "' in text:
       
   141             raise PywhoisError(text)
       
   142         else:
       
   143             WhoisEntry.__init__(self, domain, text) 
       
   144 
       
   145 
       
   146 class WhoisOrg(WhoisEntry):
       
   147     """Whois parser for .org domains
       
   148     """
       
   149     def __init__(self, domain, text):
       
   150         if text.strip() == 'NOT FOUND':
       
   151             raise PywhoisError(text)
       
   152         else:
       
   153             WhoisEntry.__init__(self, domain, text) 
       
   154 
       
   155 
       
   156 class WhoisRu(WhoisEntry):
       
   157     """Whois parser for .ru domains
       
   158     """
       
   159     regex = {
       
   160         'domain_name': 'domain:\s*(.+)',
       
   161         'registrar': 'registrar:\s*(.+)',
       
   162         'creation_date': 'created:\s*(.+)',
       
   163         'expiration_date': 'paid-till:\s*(.+)',
       
   164         'name_servers': 'nserver:\s*(.+)',  # list of name servers
       
   165         'status': 'state:\s*(.+)',  # list of statuses
       
   166         'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email addresses
       
   167     }
       
   168 
       
   169     def __init__(self, domain, text):
       
   170         if text.strip() == 'No entries found':
       
   171             raise PywhoisError(text)
       
   172         else:
       
   173             WhoisEntry.__init__(self, domain, text, self.regex)
       
   174 
       
   175 
       
   176 class WhoisName(WhoisEntry):
       
   177     """Whois parser for .name domains
       
   178     """
       
   179     regex = {
       
   180     	'domain_name_id':  'Domain Name ID:\s*(.+)',
       
   181         'domain_name':     'Domain Name:\s*(.+)',
       
   182         'registrar_id':    'Sponsoring Registrar ID:\s*(.+)',
       
   183         'registrar':       'Sponsoring Registrar:\s*(.+)',
       
   184         'registrant_id':   'Registrant ID:\s*(.+)',
       
   185         'admin_id':        'Admin ID:\s*(.+)',
       
   186         'technical_id':    'Tech ID:\s*(.+)',
       
   187         'billing_id':      'Billing ID:\s*(.+)',
       
   188         'creation_date':   'Created On:\s*(.+)',
       
   189         'expiration_date': 'Expires On:\s*(.+)',
       
   190         'updated_date':    'Updated On:\s*(.+)',
       
   191         'name_server_ids': 'Name Server ID:\s*(.+)',  # list of name server ids
       
   192         'name_servers':    'Name Server:\s*(.+)',  # list of name servers
       
   193         'status':          'Domain Status:\s*(.+)',  # list of statuses
       
   194 	}
       
   195     def __init__(self, domain, text):
       
   196         if 'No match.' in text:
       
   197             raise PywhoisError(text)
       
   198         else:
       
   199             WhoisEntry.__init__(self, domain, text, self.regex) 
       
   200     
       
   201         
       
   202 class WhoisUs(WhoisEntry):
       
   203     """Whois parser for .us domains
       
   204     """
       
   205     regex = {
       
   206         'domain_name':                    'Domain Name:\s*(.+)',
       
   207     	'domain__id':                     'Domain ID:\s*(.+)',
       
   208         'registrar':                      'Sponsoring Registrar:\s*(.+)',
       
   209         'registrar_id':                   'Sponsoring Registrar IANA ID:\s*(.+)',
       
   210         'registrar_url':                  'Registrar URL \(registration services\):\s*(.+)',        
       
   211         'status':                         'Domain Status:\s*(.+)',  # list of statuses
       
   212         'registrant_id':                  'Registrant ID:\s*(.+)',
       
   213         'registrant_name':                'Registrant Name:\s*(.+)',
       
   214         'registrant_address1':            'Registrant Address1:\s*(.+)',
       
   215         'registrant_address2':            'Registrant Address2:\s*(.+)',
       
   216         'registrant_city':                'Registrant City:\s*(.+)',
       
   217         'registrant_state_province':      'Registrant State/Province:\s*(.+)',
       
   218         'registrant_postal_code':         'Registrant Postal Code:\s*(.+)',
       
   219         'registrant_country':             'Registrant Country:\s*(.+)',
       
   220         'registrant_country_code':        'Registrant Country Code:\s*(.+)',
       
   221         'registrant_phone_number':        'Registrant Phone Number:\s*(.+)',
       
   222         'registrant_email':               'Registrant Email:\s*(.+)',
       
   223         'registrant_application_purpose': 'Registrant Application Purpose:\s*(.+)',
       
   224         'registrant_nexus_category':      'Registrant Nexus Category:\s*(.+)',
       
   225         'admin_id':                       'Administrative Contact ID:\s*(.+)',
       
   226         'admin_name':                     'Administrative Contact Name:\s*(.+)',
       
   227         'admin_address1':                 'Administrative Contact Address1:\s*(.+)',
       
   228         'admin_address2':                 'Administrative Contact Address2:\s*(.+)',
       
   229         'admin_city':                     'Administrative Contact City:\s*(.+)',
       
   230         'admin_state_province':           'Administrative Contact State/Province:\s*(.+)',
       
   231         'admin_postal_code':              'Administrative Contact Postal Code:\s*(.+)',
       
   232         'admin_country':                  'Administrative Contact Country:\s*(.+)',
       
   233         'admin_country_code':             'Administrative Contact Country Code:\s*(.+)',
       
   234         'admin_phone_number':             'Administrative Contact Phone Number:\s*(.+)',
       
   235         'admin_email':                    'Administrative Contact Email:\s*(.+)',
       
   236         'admin_application_purpose':      'Administrative Application Purpose:\s*(.+)',
       
   237         'admin_nexus_category':           'Administrative Nexus Category:\s*(.+)',
       
   238         'billing_id':                     'Billing Contact ID:\s*(.+)',
       
   239         'billing_name':                   'Billing Contact Name:\s*(.+)',
       
   240         'billing_address1':               'Billing Contact Address1:\s*(.+)',
       
   241         'billing_address2':               'Billing Contact Address2:\s*(.+)',
       
   242         'billing_city':                   'Billing Contact City:\s*(.+)',
       
   243         'billing_state_province':         'Billing Contact State/Province:\s*(.+)',
       
   244         'billing_postal_code':            'Billing Contact Postal Code:\s*(.+)',
       
   245         'billing_country':                'Billing Contact Country:\s*(.+)',
       
   246         'billing_country_code':           'Billing Contact Country Code:\s*(.+)',
       
   247         'billing_phone_number':           'Billing Contact Phone Number:\s*(.+)',
       
   248         'billing_email':                  'Billing Contact Email:\s*(.+)',
       
   249         'billing_application_purpose':    'Billing Application Purpose:\s*(.+)',
       
   250         'billing_nexus_category':         'Billing Nexus Category:\s*(.+)',
       
   251         'tech_id':                        'Technical Contact ID:\s*(.+)',
       
   252         'tech_name':                      'Technical Contact Name:\s*(.+)',
       
   253         'tech_address1':                  'Technical Contact Address1:\s*(.+)',
       
   254         'tech_address2':                  'Technical Contact Address2:\s*(.+)',
       
   255         'tech_city':                      'Technical Contact City:\s*(.+)',
       
   256         'tech_state_province':            'Technical Contact State/Province:\s*(.+)',
       
   257         'tech_postal_code':               'Technical Contact Postal Code:\s*(.+)',
       
   258         'tech_country':                   'Technical Contact Country:\s*(.+)',
       
   259         'tech_country_code':              'Technical Contact Country Code:\s*(.+)',
       
   260         'tech_phone_number':              'Technical Contact Phone Number:\s*(.+)',
       
   261         'tech_email':                     'Technical Contact Email:\s*(.+)',
       
   262         'tech_application_purpose':       'Technical Application Purpose:\s*(.+)',
       
   263         'tech_nexus_category':            'Technical Nexus Category:\s*(.+)',
       
   264         'name_servers':                   'Name Server:\s*(.+)',  # list of name servers
       
   265         'created_by_registrar':           'Created by Registrar:\s*(.+)',
       
   266         'last_updated_by_registrar':      'Last Updated by Registrar:\s*(.+)',
       
   267         'creation_date':                  'Domain Registration Date:\s*(.+)',
       
   268         'expiration_date':                'Domain Expiration Date:\s*(.+)',
       
   269         'updated_date':                   'Domain Last Updated Date:\s*(.+)',
       
   270 	}
       
   271     def __init__(self, domain, text):
       
   272         if 'Not found:' in text:
       
   273             raise PywhoisError(text)
       
   274         else:
       
   275             WhoisEntry.__init__(self, domain, text, self.regex)
       
   276        
       
   277 
       
   278 class WhoisPl(WhoisEntry):
       
   279    """Whois parser for .uk domains
       
   280    """
       
   281    regex = {
       
   282        'domain_name':                    'DOMAIN NAME:\s*(.+)\n',
       
   283        'registrar':                      'REGISTRAR:\n\s*(.+)',
       
   284        'registrar_url':                  'URL:\s*(.+)',        # not available
       
   285        'status':                         'Registration status:\n\s*(.+)',  # not available
       
   286        'registrant_name':                'Registrant:\n\s*(.+)',   # not available
       
   287        'creation_date':                  'created:\s*(.+)\n',
       
   288        'expiration_date':                'renewal date:\s*(.+)',
       
   289        'updated_date':                   'last modified:\s*(.+)\n',
       
   290    }
       
   291    def __init__(self, domain, text):
       
   292        if 'Not found:' in text:
       
   293            raise PywhoisError(text)
       
   294        else:
       
   295            WhoisEntry.__init__(self, domain, text, self.regex)
       
   296  
       
   297     
       
   298 class WhoisMe(WhoisEntry):
       
   299     """Whois parser for .me domains
       
   300     """
       
   301     regex = {
       
   302     	'domain_id':                   'Domain ID:(.+)',
       
   303         'domain_name':                 'Domain Name:(.+)',
       
   304         'creation_date':               'Domain Create Date:(.+)',
       
   305         'updated_date':                'Domain Last Updated Date:(.+)',
       
   306         'expiration_date':             'Domain Expiration Date:(.+)',
       
   307         'transfer_date':               'Last Transferred Date:(.+)',
       
   308         'trademark_name':              'Trademark Name:(.+)',
       
   309         'trademark_country':           'Trademark Country:(.+)',
       
   310         'trademark_number':            'Trademark Number:(.+)',
       
   311         'trademark_application_date':  'Date Trademark Applied For:(.+)',
       
   312         'trademark_registration_date': 'Date Trademark Registered:(.+)',
       
   313         'registrar':                   'Sponsoring Registrar:(.+)',
       
   314         'created_by':                  'Created by:(.+)',
       
   315         'updated_by':                  'Last Updated by Registrar:(.+)',
       
   316         'status':                      'Domain Status:(.+)',  # list of statuses
       
   317         'registrant_id':               'Registrant ID:(.+)',
       
   318         'registrant_name':             'Registrant Name:(.+)',
       
   319         'registrant_org':              'Registrant Organization:(.+)',
       
   320         'registrant_address':          'Registrant Address:(.+)',
       
   321         'registrant_address2':         'Registrant Address2:(.+)',
       
   322         'registrant_address3':         'Registrant Address3:(.+)',
       
   323         'registrant_city':             'Registrant City:(.+)',
       
   324         'registrant_state_province':   'Registrant State/Province:(.+)',
       
   325         'registrant_country':          'Registrant Country/Economy:(.+)',
       
   326         'registrant_postal_code':      'Registrant Postal Code:(.+)',
       
   327         'registrant_phone':            'Registrant Phone:(.+)',
       
   328         'registrant_phone_ext':        'Registrant Phone Ext\.:(.+)',
       
   329         'registrant_fax':              'Registrant FAX:(.+)',
       
   330         'registrant_fax_ext':          'Registrant FAX Ext\.:(.+)',
       
   331         'registrant_email':            'Registrant E-mail:(.+)',
       
   332         'admin_id':                    'Admin ID:(.+)',
       
   333         'admin_name':                  'Admin Name:(.+)',
       
   334         'admin_org':                   'Admin Organization:(.+)',
       
   335         'admin_address':               'Admin Address:(.+)',
       
   336         'admin_address2':              'Admin Address2:(.+)',
       
   337         'admin_address3':              'Admin Address3:(.+)',
       
   338         'admin_city':                  'Admin City:(.+)',
       
   339         'admin_state_province':        'Admin State/Province:(.+)',
       
   340         'admin_country':               'Admin Country/Economy:(.+)',
       
   341         'admin_postal_code':           'Admin Postal Code:(.+)',
       
   342         'admin_phone':                 'Admin Phone:(.+)',
       
   343         'admin_phone_ext':             'Admin Phone Ext\.:(.+)',
       
   344         'admin_fax':                   'Admin FAX:(.+)',
       
   345         'admin_fax_ext':               'Admin FAX Ext\.:(.+)',
       
   346         'admin_email':                 'Admin E-mail:(.+)',
       
   347         'tech_id':                     'Tech ID:(.+)',
       
   348         'tech_name':                   'Tech Name:(.+)',
       
   349         'tech_org':                    'Tech Organization:(.+)',
       
   350         'tech_address':                'Tech Address:(.+)',
       
   351         'tech_address2':               'Tech Address2:(.+)',
       
   352         'tech_address3':               'Tech Address3:(.+)',
       
   353         'tech_city':                   'Tech City:(.+)',
       
   354         'tech_state_province':         'Tech State/Province:(.+)',
       
   355         'tech_country':                'Tech Country/Economy:(.+)',
       
   356         'tech_postal_code':            'Tech Postal Code:(.+)',
       
   357         'tech_phone':                  'Tech Phone:(.+)',
       
   358         'tech_phone_ext':              'Tech Phone Ext\.:(.+)',
       
   359         'tech_fax':                    'Tech FAX:(.+)',
       
   360         'tech_fax_ext':                'Tech FAX Ext\.:(.+)',
       
   361         'tech_email':                  'Tech E-mail:(.+)',
       
   362         'name_servers':                'Nameservers:(.+)',  # list of name servers
       
   363 	}
       
   364     def __init__(self, domain, text):
       
   365         if 'NOT FOUND' in text:
       
   366             raise PywhoisError(text)
       
   367         else:
       
   368             WhoisEntry.__init__(self, domain, text, self.regex) 
       
   369 
       
   370 
       
   371 class WhoisUk(WhoisEntry):
       
   372     """Whois parser for .uk domains
       
   373     """
       
   374     regex = {
       
   375         'domain_name':                    'Domain name:\n\s*(.+)',
       
   376         'registrar':                      'Registrar:\n\s*(.+)',
       
   377         'registrar_url':                  'URL:\s*(.+)',
       
   378         'status':                         'Registration status:\n\s*(.+)',  # list of statuses
       
   379         'registrant_name':                'Registrant:\n\s*(.+)',
       
   380         'creation_date':                  'Registered on:\s*(.+)',
       
   381         'expiration_date':                'Expiry date:\s*(.+)',
       
   382         'updated_date':                   'Last updated:\s*(.+)',
       
   383         'name_servers':                   'Name servers:\s*(.+)',
       
   384 	}
       
   385     def __init__(self, domain, text):
       
   386         if 'Not found:' in text:
       
   387             raise PywhoisError(text)
       
   388         else:
       
   389             WhoisEntry.__init__(self, domain, text, self.regex)
       
   390 
       
   391 
       
   392 class WhoisFr(WhoisEntry):
       
   393     """Whois parser for .fr domains
       
   394     """
       
   395     regex = {
       
   396         'domain_name': 'domain:\s*(.+)',
       
   397         'registrar': 'registrar:\s*(.+)',
       
   398         'creation_date': 'created:\s*(.+)',
       
   399         'expiration_date': 'anniversary:\s*(.+)',
       
   400         'name_servers': 'nserver:\s*(.+)',  # list of name servers
       
   401         'status': 'status:\s*(.+)',  # list of statuses
       
   402         'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email addresses
       
   403         'updated_date': 'last-update:\s*(.+)',
       
   404     }
       
   405 
       
   406     def __init__(self, domain, text):
       
   407         if text.strip() == 'No entries found':
       
   408             raise PywhoisError(text)
       
   409         else:
       
   410             WhoisEntry.__init__(self, domain, text, self.regex)
       
   411 
       
   412 
       
   413 class WhoisFi(WhoisEntry):
       
   414     """Whois parser for .fi domains
       
   415     """
       
   416     regex = {
       
   417         'domain_name':                    'domain:\s*([\S]+)',
       
   418         'registrant_name':                'descr:\s*([\S\ ]+)',
       
   419         'registrant_address':             'address:\s*([\S\ ]+)',
       
   420         'registrant_phone':               'phone:\s*([\S\ ]+)',
       
   421         'status':                         'status:\s*([\S]+)',  # list of statuses
       
   422         'creation_date':                  'created:\s*([\S]+)',
       
   423         'updated_date':                   'modified:\s*([\S]+)',
       
   424         'expiration_date':                'expires:\s*([\S]+)',
       
   425         'name_servers':                   'nserver:\s*([\S]+) \[(\S+)\]',  # list of name servers
       
   426         'dnssec':                   'dnssec:\s*([\S]+)',  # list of name servers
       
   427 	}
       
   428     def __init__(self, domain, text):
       
   429         if 'Not found:' in text:
       
   430             raise PywhoisError(text)
       
   431         else:
       
   432             WhoisEntry.__init__(self, domain, text, self.regex)
       
   433 
       
   434 
       
   435 class WhoisJp(WhoisEntry):
       
   436     """Whois parser for .jp domains
       
   437     """
       
   438     regex = {
       
   439         'domain_name': 'a\. \[Domain Name\]\s*(.+)',
       
   440         'registrant_org': 'g\. \[Organization\](.+)',
       
   441         'creation_date': r'\[Registered Date\]\s*(.+)',
       
   442         'name_servers': 'p\. \[Name Server\]\s*(.+)',  # list of name servers
       
   443         'updated_date':  '\[Last Update\]\s?(.+)',
       
   444         'status': '\[State\]\s*(.+)',  # list of statuses
       
   445     }
       
   446 
       
   447     def __init__(self, domain, text):
       
   448         if text.strip() == 'No entries found':
       
   449             raise PywhoisError(text)
       
   450         else:
       
   451             WhoisEntry.__init__(self, domain, text, self.regex)