test/test_parser.py
changeset 135 808c8bc803f5
parent 102 e8cb8d1367c0
child 149 3aff6a7772b3
child 156 9437303d43e8
equal deleted inserted replaced
134:3ff7f09ea24a 135:808c8bc803f5
    11 import sys
    11 import sys
    12 sys.path.append('../')
    12 sys.path.append('../')
    13 
    13 
    14 import datetime
    14 import datetime
    15 
    15 
    16 import simplejson
    16 try:
       
    17     import json
       
    18 except:
       
    19     import simplejson as json
    17 from glob import glob
    20 from glob import glob
    18 
    21 
    19 from whois.parser import WhoisEntry, cast_date
    22 from whois.parser import WhoisEntry, cast_date
    20 
    23 
    21 class TestParser(unittest.TestCase):
    24 class TestParser(unittest.TestCase):
    23         data = """
    26         data = """
    24         Status: ok
    27         Status: ok
    25         Updated Date: 14-apr-2008
    28         Updated Date: 14-apr-2008
    26         Creation Date: 14-apr-2008
    29         Creation Date: 14-apr-2008
    27         Expiration Date: 14-apr-2009
    30         Expiration Date: 14-apr-2009
    28         
    31 
    29         >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<<
    32         >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<<
    30         """
    33         """
    31         w = WhoisEntry.load('urlowl.com', data)
    34         w = WhoisEntry.load('urlowl.com', data)
    32         expires = w.expiration_date.strftime('%Y-%m-%d')
    35         expires = w.expiration_date.strftime('%Y-%m-%d')
    33         self.assertEqual(expires, '2009-04-14')
    36         self.assertEqual(expires, '2009-04-14')
    41     def test_com_allsamples(self):
    44     def test_com_allsamples(self):
    42         """
    45         """
    43         Iterate over all of the sample/whois/*.com files, read the data,
    46         Iterate over all of the sample/whois/*.com files, read the data,
    44         parse it, and compare to the expected values in sample/expected/.
    47         parse it, and compare to the expected values in sample/expected/.
    45         Only keys defined in keys_to_test will be tested.
    48         Only keys defined in keys_to_test will be tested.
    46         
    49 
    47         To generate fresh expected value dumps, see NOTE below.
    50         To generate fresh expected value dumps, see NOTE below.
    48         """
    51         """
    49         keys_to_test = ['domain_name', 'expiration_date', 'updated_date',
    52         keys_to_test = ['domain_name', 'expiration_date', 'updated_date',
    50                         'creation_date', 'status']
    53                         'creation_date', 'status']
    51         fail = 0
    54         fail = 0
    53         for path in glob('test/samples/whois/*.com'):
    56         for path in glob('test/samples/whois/*.com'):
    54             # Parse whois data
    57             # Parse whois data
    55             domain = os.path.basename(path)
    58             domain = os.path.basename(path)
    56             with open(path) as whois_fp:
    59             with open(path) as whois_fp:
    57                 data = whois_fp.read()
    60                 data = whois_fp.read()
    58             
    61 
    59             w = WhoisEntry.load(domain, data)
    62             w = WhoisEntry.load(domain, data)
    60             results = {key: w.get(key) for key in keys_to_test}
    63             results = {key: w.get(key) for key in keys_to_test}
    61 
    64 
    62             # NOTE: Toggle condition below to write expected results from the
    65             # NOTE: Toggle condition below to write expected results from the
    63             # parse results This will overwrite the existing expected results.
    66             # parse results This will overwrite the existing expected results.
    69                         return str(obj)
    72                         return str(obj)
    70                     raise TypeError(
    73                     raise TypeError(
    71                             '{} is not JSON serializable'.format(repr(obj)))
    74                             '{} is not JSON serializable'.format(repr(obj)))
    72                 outfile_name = os.path.join('test/samples/expected/', domain)
    75                 outfile_name = os.path.join('test/samples/expected/', domain)
    73                 with open(outfile_name, 'w') as outfil:
    76                 with open(outfile_name, 'w') as outfil:
    74                     expected_results = simplejson.dump(results, outfil,
    77                     expected_results = json.dump(results, outfil,
    75                                                        default=date2str4json)
    78                                                        default=date2str4json)
    76                 continue
    79                 continue
    77 
    80 
    78             # Load expected result
    81             # Load expected result
    79             with open(os.path.join('test/samples/expected/', domain)) as infil:
    82             with open(os.path.join('test/samples/expected/', domain)) as infil:
    80                 expected_results = simplejson.load(infil)
    83                 expected_results = json.load(infil)
    81             
    84 
    82             # Compare each key
    85             # Compare each key
    83             for key in results:
    86             for key in results:
    84                 total += 1
    87                 total += 1
    85                 result = results.get(key)
    88                 result = results.get(key)
    86                 if isinstance(result, datetime.datetime):
    89                 if isinstance(result, datetime.datetime):
    87                     result = str(result)
    90                     result = str(result)
    88                 expected = expected_results.get(key)
    91                 expected = expected_results.get(key)
    89                 if expected != result:
    92                 if expected != result:
    90                     print("%s \t(%s):\t %s != %s" % (domain, key, result, expected))
    93                     print("%s \t(%s):\t %s != %s" % (domain, key, result, expected))
    91                     fail += 1
    94                     fail += 1
    92             
    95 
    93         if fail:
    96         if fail:
    94             self.fail("%d/%d sample whois attributes were not parsed properly!"
    97             self.fail("%d/%d sample whois attributes were not parsed properly!"
    95                       % (fail, total))
    98                       % (fail, total))
    96 
    99 
    97 
   100 
   129 
   132 
   130         Name servers:
   133         Name servers:
   131             ns1.testserver1.net
   134             ns1.testserver1.net
   132             ns2.testserver2.net
   135             ns2.testserver2.net
   133         """
   136         """
   134         results = WhoisEntry.load('testcompany.ca', data)
       
   135         expected_results = {
   137         expected_results = {
   136             "updated_date": "2016-04-29 00:00:00", 
   138             "updated_date": "2016-04-29 00:00:00",
   137             "registrant_name": [
   139             "registrant_name": [
   138                 "Webnames.ca Inc.", 
   140                 "Webnames.ca Inc.",
   139                 "Test Industries", 
   141                 "Test Industries",
   140                 "Test Person1", 
   142                 "Test Person1",
   141                 "Test Persion2"
   143                 "Test Persion2"
   142             ], 
   144             ],
   143             "fax": [
   145             "fax": [
   144                 "+1.123434123", 
   146                 "+1.123434123",
   145                 "+1.12312993873"
   147                 "+1.12312993873"
   146             ], 
   148             ],
   147             "dnssec": "Unsigned", 
   149             "dnssec": "Unsigned",
   148             "registrant_number": "70", 
   150             "registrant_number": "70",
   149             "expiration_date": "2020-03-08 00:00:00", 
   151             "expiration_date": "2020-03-08 00:00:00",
   150             "domain_name": "testdomain.ca", 
   152             "domain_name": "testdomain.ca",
   151             "creation_date": "2000-11-20 00:00:00", 
   153             "creation_date": "2000-11-20 00:00:00",
   152             "phone": [
   154             "phone": [
   153                 "+1.1235434123x123", 
   155                 "+1.1235434123x123",
   154                 "+1.09876545123"
   156                 "+1.09876545123"
   155             ], 
   157             ],
   156             "domain_status": "registered", 
   158             "domain_status": "registered",
   157             "emails": [
   159             "emails": [
   158                 "testperson1@testcompany.ca", 
   160                 "testperson1@testcompany.ca",
   159                 "testpersion2@testcompany.ca"
   161                 "testpersion2@testcompany.ca"
   160             ]
   162             ]
   161         }
   163         }
   162         
   164         self._parse_and_compare('testcompany.ca', data, expected_results)
       
   165 
       
   166     def test_il_parse(self):
       
   167         data = """
       
   168             query:        python.org.il
       
   169 
       
   170             reg-name:     python
       
   171             domain:       python.org.il
       
   172 
       
   173             descr:        Arik Baratz
       
   174             descr:        PO Box 7775 PMB 8452
       
   175             descr:        San Francisco, CA
       
   176             descr:        94120
       
   177             descr:        USA
       
   178             phone:        +1 650 6441973
       
   179             e-mail:       hostmaster AT arik.baratz.org
       
   180             admin-c:      LD-AB16063-IL
       
   181             tech-c:       LD-AB16063-IL
       
   182             zone-c:       LD-AB16063-IL
       
   183             nserver:      dns1.zoneedit.com
       
   184             nserver:      dns2.zoneedit.com
       
   185             nserver:      dns3.zoneedit.com
       
   186             validity:     10-05-2018
       
   187             DNSSEC:       unsigned
       
   188             status:       Transfer Locked
       
   189             changed:      domain-registrar AT isoc.org.il 20050524 (Assigned)
       
   190             changed:      domain-registrar AT isoc.org.il 20070520 (Transferred)
       
   191             changed:      domain-registrar AT isoc.org.il 20070520 (Changed)
       
   192             changed:      domain-registrar AT isoc.org.il 20070520 (Changed)
       
   193             changed:      domain-registrar AT isoc.org.il 20070807 (Changed)
       
   194             changed:      domain-registrar AT isoc.org.il 20071025 (Changed)
       
   195             changed:      domain-registrar AT isoc.org.il 20071025 (Changed)
       
   196             changed:      domain-registrar AT isoc.org.il 20081221 (Changed)
       
   197             changed:      domain-registrar AT isoc.org.il 20081221 (Changed)
       
   198             changed:      domain-registrar AT isoc.org.il 20160301 (Changed)
       
   199             changed:      domain-registrar AT isoc.org.il 20160301 (Changed)
       
   200 
       
   201             person:       Arik Baratz
       
   202             address:      PO Box 7775 PMB 8452
       
   203             address:      San Francisco, CA
       
   204             address:      94120
       
   205             address:      USA
       
   206             phone:        +1 650 9635533
       
   207             e-mail:       hostmaster AT arik.baratz.org
       
   208             nic-hdl:      LD-AB16063-IL
       
   209             changed:      Managing Registrar 20070514
       
   210             changed:      Managing Registrar 20081002
       
   211             changed:      Managing Registrar 20081221
       
   212             changed:      Managing Registrar 20081221
       
   213             changed:      Managing Registrar 20090502
       
   214 
       
   215             registrar name: LiveDns Ltd
       
   216             registrar info: http://domains.livedns.co.il
       
   217         """
       
   218         expected_results = {
       
   219             "updated_date": None,
       
   220             "registrant_name": "Arik Baratz",
       
   221             "fax": None,
       
   222             "dnssec": "unsigned",
       
   223             "expiration_date": "2018-05-10 00:00:00",
       
   224             "domain_name": "python.org.il",
       
   225             "creation_date": None,
       
   226             "phone": ['+1 650 6441973', '+1 650 9635533'],
       
   227             "status": "Transfer Locked",
       
   228             "emails": "hostmaster@arik.baratz.org",
       
   229             "name_servers": ["dns1.zoneedit.com", "dns2.zoneedit.com", "dns3.zoneedit.com"],
       
   230             "registrar": "LiveDns Ltd",
       
   231             "referral_url": "http://domains.livedns.co.il"
       
   232         }
       
   233         self._parse_and_compare('python.org.il', data, expected_results)
       
   234 
       
   235     def _parse_and_compare(self, domain_name, data, expected_results):
       
   236         results = WhoisEntry.load(domain_name, data)
   163         fail = 0
   237         fail = 0
   164         total = 0
   238         total = 0
   165 
       
   166         # Compare each key
   239         # Compare each key
   167         for key in expected_results:
   240         for key in expected_results:
   168             total += 1
   241             total += 1
   169             result = results.get(key)
   242             result = results.get(key)
   170             if isinstance(result, datetime.datetime):
   243             if isinstance(result, datetime.datetime):
   171                 result = str(result)
   244                 result = str(result)
   172             expected = expected_results.get(key)
   245             expected = expected_results.get(key)
   173             if expected != result:
   246             if expected != result:
   174                 print("%s \t(%s):\t %s != %s" % (domain, key, result, expected))
   247                 print("%s \t(%s):\t %s != %s" % (domain_name, key, result, expected))
   175                 fail += 1
   248                 fail += 1
   176         if fail:
   249         if fail:
   177             self.fail("%d/%d sample whois attributes were not parsed properly!"
   250             self.fail("%d/%d sample whois attributes were not parsed properly!"
   178                       % (fail, total))
   251                       % (fail, total))
   179 
   252 
   180 
   253 
   181 
   254 
   182         
       
   183 
       
   184 if __name__ == '__main__':
   255 if __name__ == '__main__':
   185     unittest.main()
   256     unittest.main()