whois/parser.py
changeset 47 fb36b7288fe9
parent 46 b3862a45fdad
child 48 98b1f0c6ded1
equal deleted inserted replaced
46:b3862a45fdad 47:fb36b7288fe9
    71     """Base class for parsing a Whois entries.
    71     """Base class for parsing a Whois entries.
    72     """
    72     """
    73     # regular expressions to extract domain data from whois profile
    73     # regular expressions to extract domain data from whois profile
    74     # child classes will override this
    74     # child classes will override this
    75     _regex = {
    75     _regex = {
    76         'domain_name':      'Domain Name:\s?(.+)',
    76         'domain_name':          'Domain Name:\s?(.+)',
    77         'registrar':        'Registrar:\s?(.+)',
    77         'registrar':            'Registrar:\s?(.+)',
    78         'whois_server':     'Whois Server:\s?(.+)',
    78         'whois_server':         'Whois Server:\s?(.+)',
    79         'referral_url':     'Referral URL:\s?(.+)',  # http url of whois_server
    79         'referral_url':         'Referral URL:\s?(.+)',  # http url of whois_server
    80         'updated_date':     'Updated Date:\s?(.+)',
    80         'updated_date':         'Updated Date:\s?(.+)',
    81         'creation_date':    'Creation Date:\s?(.+)',
    81         'creation_date':        'Creation Date:\s?(.+)',
    82         'expiration_date':  'Expir\w+ Date:\s?(.+)',
    82         'expiration_date':      'Expir\w+ Date:\s?(.+)',
    83         'name_servers':     'Name Server:\s?(.+)',  # list of name servers
    83         'name_servers':         'Name Server:\s?(.+)',  # list of name servers
    84         'status':           'Status:\s?(.+)',  # list of statuses
    84         'status':               'Status:\s?(.+)',  # list of statuses
    85         'emails':           '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email s
    85         'emails':               '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email s
    86         'dnssec':           'dnssec:\s*([\S]+)',
    86         'dnssec':               'dnssec:\s*([\S]+)',
       
    87         'name':                 'Registrant Name:\s*(.+)',
       
    88         'org':                  'Registrant\s*Organization:\s*(.+)',
       
    89         'address':              'Registrant Street:\s*(.+)',
       
    90         'city':                 'Registrant City:\s*(.+)',
       
    91         'state':                'Registrant State/Province:\s*(.+)',
       
    92         'zipcode':              'Registrant Postal Code:\s*(.+)',
       
    93         'country':              'Registrant Country:\s*(.+)',
    87     }
    94     }
    88     dayfirst = False
    95     dayfirst = False
    89     yearfirst = False
    96     yearfirst = False
    90 
    97 
    91     def __init__(self, domain, text, regex=None):
    98     def __init__(self, domain, text, regex=None):
    92         self.domain = domain
    99         if 'This TLD has no whois server, but you can access the whois database at' in text:
    93         self.text = text
   100             raise PywhoisError(text)
    94         if regex is not None:
   101         else:
    95             self._regex = regex
   102             self.domain = domain
       
   103             self.text = text
       
   104             if regex is not None:
       
   105                 self._regex = regex
    96 
   106 
    97     def __getattr__(self, attr):
   107     def __getattr__(self, attr):
    98         """The first time an attribute is called it will be calculated here.
   108         """The first time an attribute is called it will be calculated here.
    99         The attribute is then set to be accessed directly by subsequent calls.
   109         The attribute is then set to be accessed directly by subsequent calls.
   100         """
   110         """
   101         whois_regex = self._regex.get(attr)
   111         try:
   102         if whois_regex:
   112             whois_regex = self._regex[attr]
   103             values = []
   113         except KeyError:
   104             for value in re.findall(whois_regex, self.text, re.IGNORECASE):
       
   105                 if isinstance(value, basestring):
       
   106                     # try casting to date format
       
   107                     value = cast_date(value.strip(),
       
   108                                       dayfirst=self.dayfirst,
       
   109                                       yearfirst=self.yearfirst)
       
   110                 if value and value not in values:
       
   111                     # avoid duplicates
       
   112                     values.append(value)
       
   113             if len(values) == 1:
       
   114                 values = values[0]
       
   115             elif not values:
       
   116                 values = None
       
   117 
       
   118             setattr(self, attr, values)
       
   119             return getattr(self, attr)
       
   120         else:
       
   121             raise AttributeError('Unknown attribute: %s' % attr)
   114             raise AttributeError('Unknown attribute: %s' % attr)
       
   115         else:
       
   116             if whois_regex:
       
   117                 values = []
       
   118                 for value in re.findall(whois_regex, self.text, re.IGNORECASE):
       
   119                     if isinstance(value, basestring):
       
   120                         # try casting to date format
       
   121                         value = cast_date(value.strip(),
       
   122                                           dayfirst=self.dayfirst,
       
   123                                           yearfirst=self.yearfirst)
       
   124                     if value and value not in values:
       
   125                         # avoid duplicates
       
   126                         values.append(value)
       
   127                 if len(values) == 1:
       
   128                     values = values[0]
       
   129                 elif not values:
       
   130                     values = None
       
   131 
       
   132                 setattr(self, attr, values)
       
   133                 return getattr(self, attr)
   122 
   134 
   123     def __str__(self):
   135     def __str__(self):
   124         """Print all whois properties of domain
   136         """Print all whois properties of domain
   125         """
   137         """
   126         return '\n'.join('%s: %s' % (attr, str(getattr(self, attr)))
   138         return '\n'.join('%s: %s' % (attr, str(getattr(self, attr)))
   165             return WhoisUs(domain, text)
   177             return WhoisUs(domain, text)
   166         elif domain.endswith('.uk'):
   178         elif domain.endswith('.uk'):
   167             return WhoisUk(domain, text)
   179             return WhoisUk(domain, text)
   168         elif domain.endswith('.fr'):
   180         elif domain.endswith('.fr'):
   169             return WhoisFr(domain, text)
   181             return WhoisFr(domain, text)
       
   182         elif domain.endswith('.nl'):
       
   183             return WhoisNl(domain, text)
   170         elif domain.endswith('.fi'):
   184         elif domain.endswith('.fi'):
   171             return WhoisFi(domain, text)
   185             return WhoisFi(domain, text)
   172         elif domain.endswith('.jp'):
   186         elif domain.endswith('.jp'):
   173             return WhoisJp(domain, text)
   187             return WhoisJp(domain, text)
   174         elif domain.endswith('.pl'):
   188         elif domain.endswith('.pl'):
   181             return WhoisKr(domain, text)
   195             return WhoisKr(domain, text)
   182         elif domain.endswith('.pt'):
   196         elif domain.endswith('.pt'):
   183             return WhoisPt(domain, text)
   197             return WhoisPt(domain, text)
   184         elif domain.endswith('.bg'):
   198         elif domain.endswith('.bg'):
   185             return WhoisBg(domain, text)
   199             return WhoisBg(domain, text)
       
   200         elif domain.endswith('.de'):
       
   201             return WhoisDe(domain, text)
       
   202         elif domain.endswith('.ca'):
       
   203             return WhoisCa(domain, text)
       
   204         elif domain.endswith('.be'):
       
   205             return WhoisBe(domain, text)
   186         elif domain.endswith('.рф'):
   206         elif domain.endswith('.рф'):
   187             return WhoisRf(domain, text)
   207             return WhoisRf(domain, text)
   188         elif domain.endswith('.info'):
   208         elif domain.endswith('.info'):
   189             return WhoisInfo(domain, text)
   209             return WhoisInfo(domain, text)
   190         else:
   210         else:
   243         'creation_date': 'created:\s*(.+)',
   263         'creation_date': 'created:\s*(.+)',
   244         'expiration_date': 'paid-till:\s*(.+)',
   264         'expiration_date': 'paid-till:\s*(.+)',
   245         'name_servers': 'nserver:\s*(.+)',  # list of name servers
   265         'name_servers': 'nserver:\s*(.+)',  # list of name servers
   246         'status': 'state:\s*(.+)',  # list of statuses
   266         'status': 'state:\s*(.+)',  # list of statuses
   247         'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email addresses
   267         'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email addresses
       
   268         'org': 'org:\s*(.+)'
   248     }
   269     }
   249 
   270 
   250     def __init__(self, domain, text):
   271     def __init__(self, domain, text):
   251         if text.strip() == 'No entries found':
   272         if text.strip() == 'No entries found':
   252             raise PywhoisError(text)
   273             raise PywhoisError(text)
   253         else:
   274         else:
   254             WhoisEntry.__init__(self, domain, text, self.regex)
   275             WhoisEntry.__init__(self, domain, text, self.regex)
   255 
   276 
       
   277 
       
   278 class WhoisNl(WhoisEntry):
       
   279     """Whois parser for .nl domains
       
   280     """
       
   281     regex = {
       
   282         'name': None,
       
   283         'address': None,
       
   284         'zip_code': None,
       
   285         'city': None,
       
   286         'country': None
       
   287     }
       
   288 
       
   289     def __init__(self, domain, text):
       
   290         if text.endswith('is free'):
       
   291             raise PywhoisError(text)
       
   292         else:
       
   293             WhoisEntry.__init__(self, domain, text, self.regex)
       
   294 
       
   295         match = re.compile('Registrar:(.*?)DNSSEC', re.DOTALL).search(text)
       
   296         if match:
       
   297             lines = match.groups()[0].strip().splitlines()
       
   298             self.name = lines[0]
       
   299             self.address = lines[1]
       
   300             if len(lines) == 4:
       
   301                 self.zip_code, _, self.city = lines[2].partition(' ')
       
   302             self.country = lines[-1]
       
   303                 
   256 
   304 
   257 class WhoisName(WhoisEntry):
   305 class WhoisName(WhoisEntry):
   258     """Whois parser for .name domains
   306     """Whois parser for .name domains
   259     """
   307     """
   260     regex = {
   308     regex = {
   273         'name_servers':    'Name Server:\s*(.+)',  # list of name servers
   321         'name_servers':    'Name Server:\s*(.+)',  # list of name servers
   274         'status':          'Domain Status:\s*(.+)',  # list of statuses
   322         'status':          'Domain Status:\s*(.+)',  # list of statuses
   275     }
   323     }
   276 
   324 
   277     def __init__(self, domain, text):
   325     def __init__(self, domain, text):
   278         if 'No match.' in text:
   326         if 'No match for ' in text:
   279             raise PywhoisError(text)
   327             raise PywhoisError(text)
   280         else:
   328         else:
   281             WhoisEntry.__init__(self, domain, text, self.regex)
   329             WhoisEntry.__init__(self, domain, text, self.regex)
   282 
   330 
   283 
   331 
   371         'expiration_date':                'renewal date:\s*(.+)',
   419         'expiration_date':                'renewal date:\s*(.+)',
   372         'updated_date':                   'last modified:\s*(.+)\n',
   420         'updated_date':                   'last modified:\s*(.+)\n',
   373     }
   421     }
   374 
   422 
   375     def __init__(self, domain, text):
   423     def __init__(self, domain, text):
   376         if 'Not found:' in text:
   424         if 'No information available about domain name' in text:
       
   425             raise PywhoisError(text)
       
   426         else:
       
   427             WhoisEntry.__init__(self, domain, text, self.regex)
       
   428 
       
   429 
       
   430 class WhoisCa(WhoisEntry):
       
   431     """Whois parser for .ca domains
       
   432     """
       
   433     regex = {
       
   434         'registrant_name':                'Name:\s*(.+)',
       
   435         'registrant_number':              'Number:\s*(.+)\n',
       
   436     }
       
   437 
       
   438     def __init__(self, domain, text):
       
   439         if 'Domain status:         available' in text:
   377             raise PywhoisError(text)
   440             raise PywhoisError(text)
   378         else:
   441         else:
   379             WhoisEntry.__init__(self, domain, text, self.regex)
   442             WhoisEntry.__init__(self, domain, text, self.regex)
   380 
   443 
   381 
   444 
   467         'updated_date':                   'Last updated:\s*(.+)',
   530         'updated_date':                   'Last updated:\s*(.+)',
   468         'name_servers':                   'Name servers:\s*(.+)',
   531         'name_servers':                   'Name servers:\s*(.+)',
   469     }
   532     }
   470 
   533 
   471     def __init__(self, domain, text):
   534     def __init__(self, domain, text):
   472         if 'Not found:' in text:
   535         if 'No match for ' in text:
   473             raise PywhoisError(text)
   536             raise PywhoisError(text)
   474         else:
   537         else:
   475             WhoisEntry.__init__(self, domain, text, self.regex)
   538             WhoisEntry.__init__(self, domain, text, self.regex)
   476 
   539 
   477 
   540 
   488         'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email addresses
   551         'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}',  # list of email addresses
   489         'updated_date': 'last-update:\s*(.+)',
   552         'updated_date': 'last-update:\s*(.+)',
   490     }
   553     }
   491 
   554 
   492     def __init__(self, domain, text):
   555     def __init__(self, domain, text):
   493         if text.strip() == 'No entries found':
   556         if 'No entries found' in text:
   494             raise PywhoisError(text)
   557             raise PywhoisError(text)
   495         else:
   558         else:
   496             WhoisEntry.__init__(self, domain, text, self.regex)
   559             WhoisEntry.__init__(self, domain, text, self.regex)
   497 
   560 
   498 
   561 
   499 class WhoisFi(WhoisEntry):
   562 class WhoisFi(WhoisEntry):
   500     """Whois parser for .fi domains
   563     """Whois parser for .fi domains
   501     """
   564     """
   502     regex = {
   565     regex = {
   503         'domain_name':                    'domain:\s*([\S]+)',
   566         'domain_name':                    'domain:\s*([\S]+)',
   504         'registrant_name':                'descr:\s*([\S\ ]+)',
   567         'name':                           'descr:\s*([\S\ ]+)',
   505         'registrant_address':             'address:\s*([\S\ ]+)',
   568         'address':                        'address:\s*([\S\ ]+)',
   506         'registrant_phone':               'phone:\s*([\S\ ]+)',
   569         'phone':                          'phone:\s*([\S\ ]+)',
   507         'status':                         'status:\s*([\S]+)',  # list of statuses
   570         'status':                         'status:\s*([\S]+)',  # list of statuses
   508         'creation_date':                  'created:\s*([\S]+)',
   571         'creation_date':                  'created:\s*([\S]+)',
   509         'updated_date':                   'modified:\s*([\S]+)',
   572         'updated_date':                   'modified:\s*([\S]+)',
   510         'expiration_date':                'expires:\s*([\S]+)',
   573         'expiration_date':                'expires:\s*([\S]+)',
   511         'name_servers':                   'nserver:\s*([\S]+) \[\S+\]',  # list of name servers
   574         'name_servers':                   'nserver:\s*([\S]+) \[\S+\]',  # list of name servers
   531         'updated_date':  '\[Last Update\]\s?(.+)',
   594         'updated_date':  '\[Last Update\]\s?(.+)',
   532         'status': '\[State\]\s*(.+)',  # list of statuses
   595         'status': '\[State\]\s*(.+)',  # list of statuses
   533     }
   596     }
   534 
   597 
   535     def __init__(self, domain, text):
   598     def __init__(self, domain, text):
   536         if text.strip() == 'No entries found':
   599         if 'No match!!' in text:
   537             raise PywhoisError(text)
   600             raise PywhoisError(text)
   538         else:
   601         else:
   539             WhoisEntry.__init__(self, domain, text, self.regex)
   602             WhoisEntry.__init__(self, domain, text, self.regex)
   540 
   603 
   541 
   604 
   629         'registrar':  'Authorized Agency\s*:\s*(.+)',
   692         'registrar':  'Authorized Agency\s*:\s*(.+)',
   630         'name_servers': 'Host Name\s*:\s*(.+)',  # list of name servers
   693         'name_servers': 'Host Name\s*:\s*(.+)',  # list of name servers
   631     }
   694     }
   632 
   695 
   633     def __init__(self, domain, text):
   696     def __init__(self, domain, text):
   634         if text.strip() == 'No entries found':
   697         if text.endswith(' no match'):
   635             raise PywhoisError(text)
   698             raise PywhoisError(text)
   636         else:
   699         else:
   637             WhoisEntry.__init__(self, domain, text, self.regex)
   700             WhoisEntry.__init__(self, domain, text, self.regex)
   638 
   701 
   639 
   702 
   664     }
   727     }
   665 
   728 
   666     dayfirst = True
   729     dayfirst = True
   667 
   730 
   668     def __init__(self, domain, text):
   731     def __init__(self, domain, text):
       
   732         if 'does not exist in database!' in text:
       
   733             raise PywhoisError(text)
       
   734         else:
       
   735             WhoisEntry.__init__(self, domain, text, self.regex)
       
   736 
       
   737 
       
   738 class WhoisRf(WhoisEntry):
       
   739     """Whois parser for .rf domains"""
       
   740 
       
   741     regex = {
       
   742         'expiration_date': 'free-date:\s*(.+)',
       
   743     }
       
   744 
       
   745     def __init__(self, domain, text):
   669         if text.strip() == 'No entries found':
   746         if text.strip() == 'No entries found':
   670             raise PywhoisError(text)
   747             raise PywhoisError(text)
   671         else:
   748         else:
   672             WhoisEntry.__init__(self, domain, text, self.regex)
   749             WhoisEntry.__init__(self, domain, text, self.regex)
   673 
   750 
   674 
   751 
   675 class WhoisRf(WhoisEntry):
   752 class WhoisDe(WhoisEntry):
   676     """Whois parser for .bg domains"""
   753     """Whois parser for .de domains"""
   677 
   754 
   678     regex = {
   755     regex = {
   679         'expiration_date': 'free-date:\s*(.+)',
   756         'name': 'name:\s*(.+)',
   680     }
   757         'org': 'Organisation:\s*(.+)',
   681 
   758         'address': 'Address:\s*(.+)',
   682     def __init__(self, domain, text):
   759         'zipcode': 'PostalCode:\s*(.+)',
   683         if text.strip() == 'No entries found':
   760         'city': 'City:\s*(.+)',
   684             raise PywhoisError(text)
   761         'country_code': 'CountryCode:\s*(.+)',
   685         else:
   762         'phone': 'Phone:\s*(.+)',
   686             WhoisEntry.__init__(self, domain, text, self.regex)
   763         'fax': 'Fax:\s*(.+)'
       
   764     }
       
   765 
       
   766     def __init__(self, domain, text):
       
   767         if 'Status: free' in text:
       
   768             raise PywhoisError(text)
       
   769         else:
       
   770             WhoisEntry.__init__(self, domain, text, self.regex)
       
   771 
       
   772 
       
   773 class WhoisBe(WhoisEntry):
       
   774     """Whois parser for .be domains"""
       
   775 
       
   776     regex = {
       
   777         'name': 'Name:\s*(.+)',
       
   778         'org': 'Organisation:\s*(.+)',
       
   779         'phone': 'Phone:\s*(.+)',
       
   780         'fax': 'Fax:\s*(.+)',
       
   781         'email': 'Email:\s*(.+)',
       
   782     }
       
   783 
       
   784     def __init__(self, domain, text):
       
   785         if 'Status: AVAILABLE' in text:
       
   786             raise PywhoisError(text)
       
   787         else:
       
   788             WhoisEntry.__init__(self, domain, text, self.regex)
       
   789 
   687 
   790 
   688 
   791 
   689 class WhoisInfo(WhoisEntry):
   792 class WhoisInfo(WhoisEntry):
   690     """Whois parser for .info domains
   793     """Whois parser for .info domains
   691     """
   794     """
   698         'creation_date':    'Creation Date:\s?(.+)',
   801         'creation_date':    'Creation Date:\s?(.+)',
   699         'expiration_date':  'Registry Expiry Date:\s?(.+)',
   802         'expiration_date':  'Registry Expiry Date:\s?(.+)',
   700         'name_servers':     'Name Server:\s?(.+)', # list of name servers
   803         'name_servers':     'Name Server:\s?(.+)', # list of name servers
   701         'status':           'Status:\s?(.+)', # list of statuses
   804         'status':           'Status:\s?(.+)', # list of statuses
   702         'emails':           '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses
   805         'emails':           '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses
       
   806         'name':             'Registrant Name:\s*(.+)',
       
   807         'org':              'Registrant Organization:\s*(.+)',
       
   808         'address':          'Registrant Street:\s*(.+)',
       
   809         'city':             'Registrant City:\s*(.+)',
       
   810         'state':            'Registrant State/Province:\s*(.+)',
       
   811         'zipcode':          'Registrant Postal Code:\s*(.+)',
       
   812         'country':          'Registrant Country:\s*(.+)',
   703     }
   813     }
   704 
   814 
   705     def __init__(self, domain, text):
   815     def __init__(self, domain, text):
   706         if text.strip() == 'NOT FOUND':
   816         if text.strip() == 'NOT FOUND':
   707             raise PywhoisError(text)
   817             raise PywhoisError(text)