# HG changeset patch # User Richard Penman # Date 1464901521 25200 # Node ID 3202436d89d0e0a03b93fca8e9db1f0826f80237 # Parent acdc2cb09f6001c32789a629a856fef1ef16d6c8# Parent 44522cd37b07ce0e795500d50d8f61c0418b81cd Merged in mariosantana/pywhois (pull request #7) Regression in queries from python3 diff -r acdc2cb09f60 -r 3202436d89d0 .hgignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Thu Jun 02 14:05:21 2016 -0700 @@ -0,0 +1,4 @@ +^.eggs$ +^python_whois.egg-info$ +\.pyc$ +\.swp$ diff -r acdc2cb09f60 -r 3202436d89d0 setup.py --- a/setup.py Thu Jun 02 13:59:46 2016 -0700 +++ b/setup.py Thu Jun 02 14:05:21 2016 -0700 @@ -29,6 +29,8 @@ extras_require={ 'better date conversion': ["python-dateutil"] }, + test_suite='nose.collector', + tests_require=['nose', 'simplejson'], include_package_data=True, zip_safe=False ) diff -r acdc2cb09f60 -r 3202436d89d0 test/samples/expected/digg.com --- a/test/samples/expected/digg.com Thu Jun 02 13:59:46 2016 -0700 +++ b/test/samples/expected/digg.com Thu Jun 02 14:05:21 2016 -0700 @@ -1,1 +1,1 @@ -{"updated_date": ["13-mar-2007"], "expiration_date": ["20-feb-2010"], "status": ["clientDeleteProhibited", "clientRenewProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["DIGG.COM", "DIGG.COM"], "creation_date": ["20-feb-2000"]} \ No newline at end of file +{"domain_name": "DIGG.COM", "expiration_date": "2010-02-20 00:00:00", "updated_date": "2007-03-13 00:00:00", "status": ["clientDeleteProhibited", "clientRenewProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "2000-02-20 00:00:00"} \ No newline at end of file diff -r acdc2cb09f60 -r 3202436d89d0 test/samples/expected/google.com --- a/test/samples/expected/google.com Thu Jun 02 13:59:46 2016 -0700 +++ b/test/samples/expected/google.com Thu Jun 02 14:05:21 2016 -0700 @@ -1,1 +1,1 @@ -{"updated_date": ["10-apr-2006"], "expiration_date": ["14-sep-2011"], "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["GOOGLE.COM", "google.com"], "creation_date": ["15-sep-1997"]} \ No newline at end of file +{"domain_name": ["GOOGLE.COM", "google.com"], "expiration_date": "2011-09-14 00:00:00", "updated_date": "2006-04-10 00:00:00", "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "1997-09-15 00:00:00"} \ No newline at end of file diff -r acdc2cb09f60 -r 3202436d89d0 test/samples/expected/imdb.com --- a/test/samples/expected/imdb.com Thu Jun 02 13:59:46 2016 -0700 +++ b/test/samples/expected/imdb.com Thu Jun 02 14:05:21 2016 -0700 @@ -1,1 +1,1 @@ -{"updated_date": ["28-mar-2008"], "expiration_date": ["04-jan-2016"], "status": ["clientTransferProhibited"], "domain_name": ["IMDB.COM", "IMDB.COM"], "creation_date": ["05-jan-1996"]} \ No newline at end of file +{"domain_name": "IMDB.COM", "expiration_date": "2016-01-04 00:00:00", "updated_date": "2008-03-28 00:00:00", "status": "clientTransferProhibited", "creation_date": "1996-01-05 00:00:00"} \ No newline at end of file diff -r acdc2cb09f60 -r 3202436d89d0 test/samples/expected/microsoft.com --- a/test/samples/expected/microsoft.com Thu Jun 02 13:59:46 2016 -0700 +++ b/test/samples/expected/microsoft.com Thu Jun 02 14:05:21 2016 -0700 @@ -1,1 +1,1 @@ -{"updated_date": ["10-oct-2006"], "expiration_date": ["03-may-2014"], "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["MICROSOFT.COM"], "creation_date": ["02-may-1991"]} \ No newline at end of file +{"domain_name": "MICROSOFT.COM", "expiration_date": "2014-05-03 00:00:00", "updated_date": "2006-10-10 00:00:00", "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "1991-05-02 00:00:00"} \ No newline at end of file diff -r acdc2cb09f60 -r 3202436d89d0 test/samples/expected/reddit.com --- a/test/samples/expected/reddit.com Thu Jun 02 13:59:46 2016 -0700 +++ b/test/samples/expected/reddit.com Thu Jun 02 14:05:21 2016 -0700 @@ -1,1 +1,1 @@ -{"updated_date": ["04-jun-2008"], "expiration_date": ["29-apr-2009"], "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["REDDIT.COM", "REDDIT.COM"], "creation_date": ["29-apr-2005"]} \ No newline at end of file +{"domain_name": "REDDIT.COM", "expiration_date": "2009-04-29 00:00:00", "updated_date": "2008-06-04 00:00:00", "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "2005-04-29 00:00:00"} \ No newline at end of file diff -r acdc2cb09f60 -r 3202436d89d0 test/samples/expected/urlowl.com --- a/test/samples/expected/urlowl.com Thu Jun 02 13:59:46 2016 -0700 +++ b/test/samples/expected/urlowl.com Thu Jun 02 14:05:21 2016 -0700 @@ -1,1 +1,1 @@ -{"updated_date": ["14-apr-2008"], "expiration_date": ["14-apr-2009"], "status": ["ok"], "domain_name": ["URLOWL.COM", "urlowl.com"], "creation_date": ["14-apr-2008"]} \ No newline at end of file +{"domain_name": ["URLOWL.COM", "urlowl.com"], "expiration_date": "2009-04-14 00:00:00", "updated_date": "2008-04-14 00:00:00", "status": "ok", "creation_date": "2008-04-14 00:00:00"} \ No newline at end of file diff -r acdc2cb09f60 -r 3202436d89d0 test/test_main.py --- a/test/test_main.py Thu Jun 02 13:59:46 2016 -0700 +++ b/test/test_main.py Thu Jun 02 14:05:21 2016 -0700 @@ -19,7 +19,7 @@ def test_ascii_with_schema_path_and_query(self): url = 'https://www.google.com/search?q=why+is+domain+whois+such+a+mess' - domain = 'google.com' + domain = 'www.google.com' self.assertEqual(domain, extract_domain(url)) def test_simple_unicode_domain(self): diff -r acdc2cb09f60 -r 3202436d89d0 test/test_nicclient.py --- a/test/test_nicclient.py Thu Jun 02 13:59:46 2016 -0700 +++ b/test/test_nicclient.py Thu Jun 02 14:05:21 2016 -0700 @@ -18,7 +18,6 @@ def test_choose_server(self): domain = 'рнидс.срб' chosen = self.client.choose_server(domain) - self.assertEqual( - chosen, - 'срб'.decode('utf-8').encode('idna') + '.whois-servers.net' - ) + suffix = domain.split('.')[-1].encode('idna').decode('utf-8') + correct = '{}.whois-servers.net'.format(suffix) + self.assertEqual(chosen, correct) diff -r acdc2cb09f60 -r 3202436d89d0 test/test_parser.py --- a/test/test_parser.py Thu Jun 02 13:59:46 2016 -0700 +++ b/test/test_parser.py Thu Jun 02 14:05:21 2016 -0700 @@ -21,22 +21,22 @@ class TestParser(unittest.TestCase): def test_com_expiration(self): data = """ - Status: ok - Updated Date: 14-apr-2008 - Creation Date: 14-apr-2008 - Expiration Date: 14-apr-2009 - - >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<< + Status: ok + Updated Date: 14-apr-2008 + Creation Date: 14-apr-2008 + Expiration Date: 14-apr-2009 + + >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<< """ w = WhoisEntry.load('urlowl.com', data) expires = w.expiration_date.strftime('%Y-%m-%d') - self.assertEquals(expires, '2009-04-14') + self.assertEqual(expires, '2009-04-14') def test_cast_date(self): dates = ['14-apr-2008', '2008-04-14'] for d in dates: r = cast_date(d).strftime('%Y-%m-%d') - self.assertEquals(r, '2008-04-14') + self.assertEqual(r, '2008-04-14') def test_com_allsamples(self): """ @@ -46,41 +46,53 @@ To generate fresh expected value dumps, see NOTE below. """ - keys_to_test = ['domain_name', 'expiration_date', 'updated_date', 'creation_date', 'status'] + keys_to_test = ['domain_name', 'expiration_date', 'updated_date', + 'creation_date', 'status'] fail = 0 + total = 0 for path in glob('test/samples/whois/*.com'): # Parse whois data domain = os.path.basename(path) - whois_fp = open(path) - data = whois_fp.read() + with open(path) as whois_fp: + data = whois_fp.read() w = WhoisEntry.load(domain, data) - results = {} - for key in keys_to_test: - results[key] = w.get(key) + results = {key: w.get(key) for key in keys_to_test} + + # NOTE: Toggle condition below to write expected results from the + # parse results This will overwrite the existing expected results. + # Only do this if you've manually confirmed that the parser is + # generating correct values at its current state. + if False: + def date2str4json(obj): + if isinstance(obj, datetime.datetime): + return str(obj) + raise TypeError( + '{} is not JSON serializable'.format(repr(obj))) + outfile_name = os.path.join('test/samples/expected/', domain) + with open(outfile_name, 'w') as outfil: + expected_results = simplejson.dump(results, outfil, + default=date2str4json) + continue # Load expected result - expected_fp = open(os.path.join('test/samples/expected/', domain)) - expected_results = simplejson.load(expected_fp) - - # NOTE: Toggle condition below to write expected results from the parse results - # This will overwrite the existing expected results. Only do this if you've manually - # confirmed that the parser is generating correct values at its current state. - if False: - expected_fp = open(os.path.join('test/samples/expected/', domain), 'w') - expected_results = simplejson.dump(results, expected_fp) - continue + with open(os.path.join('test/samples/expected/', domain)) as infil: + expected_results = simplejson.load(infil) # Compare each key for key in results: + total += 1 result = results.get(key) + if isinstance(result, datetime.datetime): + result = str(result) expected = expected_results.get(key) if expected != result: print("%s \t(%s):\t %s != %s" % (domain, key, result, expected)) fail += 1 if fail: - self.fail("%d sample whois attributes were not parsed properly!" % fail) + self.fail("%d/%d sample whois attributes were not parsed properly!" + % (fail, total)) if __name__ == '__main__': diff -r acdc2cb09f60 -r 3202436d89d0 test/test_query.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_query.py Thu Jun 02 14:05:21 2016 -0700 @@ -0,0 +1,25 @@ +# coding=utf-8 + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * +import unittest +from whois import whois + + +class TestQuery(unittest.TestCase): + def test_simple_ascii_domain(self): + domain = 'google.com' + whois(domain) + + def test_simple_unicode_domain(self): + domain = 'нарояци.com' + whois(domain) + + def test_unicode_domain_and_tld(self): + domain = 'россия.рф' + whois(domain) diff -r acdc2cb09f60 -r 3202436d89d0 whois/__init__.py --- a/whois/__init__.py Thu Jun 02 13:59:46 2016 -0700 +++ b/whois/__init__.py Thu Jun 02 14:05:21 2016 -0700 @@ -57,17 +57,18 @@ return socket.gethostbyaddr(url)[0] tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt') - suffixes = [ - line.lower().strip().encode('utf-8') - for line in open(tlds_path).readlines() - if not line.startswith('#') - ] + with open(tlds_path) as tlds_fil: + suffixes = [line.lower().encode('utf-8') + for line in (x.strip() for x in tlds_fil) + if not line.startswith('#')] + suff = 'xn--p1ai' if not isinstance(url, str): url = url.decode('utf-8') - url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower() + url = re.sub('^.*://', '', url) + url = url.split('/')[0].lower().encode('idna') + domain = [] - for section in url.split(b'.'): if section in suffixes: domain.append(section) diff -r acdc2cb09f60 -r 3202436d89d0 whois/whois.py --- a/whois/whois.py Thu Jun 02 13:59:46 2016 -0700 +++ b/whois/whois.py Thu Jun 02 14:05:21 2016 -0700 @@ -97,8 +97,12 @@ s.connect((hostname, 43)) # end takes bytes as an input queryBytes = None - if type(query) is not str: + try: query = query.decode('utf-8') + except UnicodeEncodeError: + pass # Already Unicode (python2's error) + except AttributeError: + pass # Already Unicode (python3's error) if hostname == NICClient.DENICHOST: queryBytes = "-T dn,ace -C UTF-8 " + query @@ -132,14 +136,16 @@ def choose_server(self, domain): """Choose initial lookup NIC host""" - if type(domain) is not str: + try: + domain = domain.encode('idna').decode('utf-8') + except TypeError: domain = domain.decode('utf-8').encode('idna').decode('utf-8') if domain.endswith("-NORID"): return NICClient.NORIDHOST - pos = domain.rfind('.') - if pos == -1: + domain = domain.split('.') + if len(domain) < 2: return None - tld = domain[pos+1:] + tld = domain[-1] if tld[0].isdigit(): return NICClient.ANICHOST return tld + NICClient.QNICHOST_TAIL