Merged in mariosantana/pywhois (pull request #7)
authorRichard Penman <richardbp@gmail.com>
Thu, 02 Jun 2016 14:05:21 -0700
changeset 98 3202436d89d0
parent 93 acdc2cb09f60 (current diff)
parent 97 44522cd37b07 (diff)
child 99 67b90bfc59c7
child 100 b5699d950712
Merged in mariosantana/pywhois (pull request #7) Regression in queries from python3
whois/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Thu Jun 02 14:05:21 2016 -0700
@@ -0,0 +1,4 @@
+^.eggs$
+^python_whois.egg-info$
+\.pyc$
+\.swp$
--- a/setup.py	Thu Jun 02 13:59:46 2016 -0700
+++ b/setup.py	Thu Jun 02 14:05:21 2016 -0700
@@ -29,6 +29,8 @@
     extras_require={
         'better date conversion': ["python-dateutil"]
     },
+    test_suite='nose.collector',
+    tests_require=['nose', 'simplejson'],
     include_package_data=True,
     zip_safe=False
 )
--- a/test/samples/expected/digg.com	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/samples/expected/digg.com	Thu Jun 02 14:05:21 2016 -0700
@@ -1,1 +1,1 @@
-{"updated_date": ["13-mar-2007"], "expiration_date": ["20-feb-2010"], "status": ["clientDeleteProhibited", "clientRenewProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["DIGG.COM", "DIGG.COM"], "creation_date": ["20-feb-2000"]}
\ No newline at end of file
+{"domain_name": "DIGG.COM", "expiration_date": "2010-02-20 00:00:00", "updated_date": "2007-03-13 00:00:00", "status": ["clientDeleteProhibited", "clientRenewProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "2000-02-20 00:00:00"}
\ No newline at end of file
--- a/test/samples/expected/google.com	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/samples/expected/google.com	Thu Jun 02 14:05:21 2016 -0700
@@ -1,1 +1,1 @@
-{"updated_date": ["10-apr-2006"], "expiration_date": ["14-sep-2011"], "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["GOOGLE.COM", "google.com"], "creation_date": ["15-sep-1997"]}
\ No newline at end of file
+{"domain_name": ["GOOGLE.COM", "google.com"], "expiration_date": "2011-09-14 00:00:00", "updated_date": "2006-04-10 00:00:00", "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "1997-09-15 00:00:00"}
\ No newline at end of file
--- a/test/samples/expected/imdb.com	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/samples/expected/imdb.com	Thu Jun 02 14:05:21 2016 -0700
@@ -1,1 +1,1 @@
-{"updated_date": ["28-mar-2008"], "expiration_date": ["04-jan-2016"], "status": ["clientTransferProhibited"], "domain_name": ["IMDB.COM", "IMDB.COM"], "creation_date": ["05-jan-1996"]}
\ No newline at end of file
+{"domain_name": "IMDB.COM", "expiration_date": "2016-01-04 00:00:00", "updated_date": "2008-03-28 00:00:00", "status": "clientTransferProhibited", "creation_date": "1996-01-05 00:00:00"}
\ No newline at end of file
--- a/test/samples/expected/microsoft.com	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/samples/expected/microsoft.com	Thu Jun 02 14:05:21 2016 -0700
@@ -1,1 +1,1 @@
-{"updated_date": ["10-oct-2006"], "expiration_date": ["03-may-2014"], "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["MICROSOFT.COM"], "creation_date": ["02-may-1991"]}
\ No newline at end of file
+{"domain_name": "MICROSOFT.COM", "expiration_date": "2014-05-03 00:00:00", "updated_date": "2006-10-10 00:00:00", "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "1991-05-02 00:00:00"}
\ No newline at end of file
--- a/test/samples/expected/reddit.com	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/samples/expected/reddit.com	Thu Jun 02 14:05:21 2016 -0700
@@ -1,1 +1,1 @@
-{"updated_date": ["04-jun-2008"], "expiration_date": ["29-apr-2009"], "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "domain_name": ["REDDIT.COM", "REDDIT.COM"], "creation_date": ["29-apr-2005"]}
\ No newline at end of file
+{"domain_name": "REDDIT.COM", "expiration_date": "2009-04-29 00:00:00", "updated_date": "2008-06-04 00:00:00", "status": ["clientDeleteProhibited", "clientTransferProhibited", "clientUpdateProhibited"], "creation_date": "2005-04-29 00:00:00"}
\ No newline at end of file
--- a/test/samples/expected/urlowl.com	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/samples/expected/urlowl.com	Thu Jun 02 14:05:21 2016 -0700
@@ -1,1 +1,1 @@
-{"updated_date": ["14-apr-2008"], "expiration_date": ["14-apr-2009"], "status": ["ok"], "domain_name": ["URLOWL.COM", "urlowl.com"], "creation_date": ["14-apr-2008"]}
\ No newline at end of file
+{"domain_name": ["URLOWL.COM", "urlowl.com"], "expiration_date": "2009-04-14 00:00:00", "updated_date": "2008-04-14 00:00:00", "status": "ok", "creation_date": "2008-04-14 00:00:00"}
\ No newline at end of file
--- a/test/test_main.py	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/test_main.py	Thu Jun 02 14:05:21 2016 -0700
@@ -19,7 +19,7 @@
 
     def test_ascii_with_schema_path_and_query(self):
         url = 'https://www.google.com/search?q=why+is+domain+whois+such+a+mess'
-        domain = 'google.com'
+        domain = 'www.google.com'
         self.assertEqual(domain, extract_domain(url))
 
     def test_simple_unicode_domain(self):
--- a/test/test_nicclient.py	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/test_nicclient.py	Thu Jun 02 14:05:21 2016 -0700
@@ -18,7 +18,6 @@
     def test_choose_server(self):
         domain = 'рнидс.срб'
         chosen = self.client.choose_server(domain)
-        self.assertEqual(
-            chosen,
-            'срб'.decode('utf-8').encode('idna') + '.whois-servers.net'
-        )
+        suffix = domain.split('.')[-1].encode('idna').decode('utf-8')
+        correct = '{}.whois-servers.net'.format(suffix)
+        self.assertEqual(chosen, correct)
--- a/test/test_parser.py	Thu Jun 02 13:59:46 2016 -0700
+++ b/test/test_parser.py	Thu Jun 02 14:05:21 2016 -0700
@@ -21,22 +21,22 @@
 class TestParser(unittest.TestCase):
     def test_com_expiration(self):
         data = """
-            Status: ok
-            Updated Date: 14-apr-2008
-            Creation Date: 14-apr-2008
-            Expiration Date: 14-apr-2009
-            
-            >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<<
+        Status: ok
+        Updated Date: 14-apr-2008
+        Creation Date: 14-apr-2008
+        Expiration Date: 14-apr-2009
+        
+        >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<<
         """
         w = WhoisEntry.load('urlowl.com', data)
         expires = w.expiration_date.strftime('%Y-%m-%d')
-        self.assertEquals(expires, '2009-04-14')
+        self.assertEqual(expires, '2009-04-14')
 
     def test_cast_date(self):
         dates = ['14-apr-2008', '2008-04-14']
         for d in dates:
             r = cast_date(d).strftime('%Y-%m-%d')
-            self.assertEquals(r, '2008-04-14')
+            self.assertEqual(r, '2008-04-14')
 
     def test_com_allsamples(self):
         """
@@ -46,41 +46,53 @@
         
         To generate fresh expected value dumps, see NOTE below.
         """
-        keys_to_test = ['domain_name', 'expiration_date', 'updated_date', 'creation_date', 'status']
+        keys_to_test = ['domain_name', 'expiration_date', 'updated_date',
+                        'creation_date', 'status']
         fail = 0
+        total = 0
         for path in glob('test/samples/whois/*.com'):
             # Parse whois data
             domain = os.path.basename(path)
-            whois_fp = open(path)
-            data = whois_fp.read()
+            with open(path) as whois_fp:
+                data = whois_fp.read()
             
             w = WhoisEntry.load(domain, data)
-            results = {}
-            for key in keys_to_test:
-                results[key] = w.get(key)
+            results = {key: w.get(key) for key in keys_to_test}
+
+            # NOTE: Toggle condition below to write expected results from the
+            # parse results This will overwrite the existing expected results.
+            # Only do this if you've manually confirmed that the parser is
+            # generating correct values at its current state.
+            if False:
+                def date2str4json(obj):
+                    if isinstance(obj, datetime.datetime):
+                        return str(obj)
+                    raise TypeError(
+                            '{} is not JSON serializable'.format(repr(obj)))
+                outfile_name = os.path.join('test/samples/expected/', domain)
+                with open(outfile_name, 'w') as outfil:
+                    expected_results = simplejson.dump(results, outfil,
+                                                       default=date2str4json)
+                continue
 
             # Load expected result
-            expected_fp = open(os.path.join('test/samples/expected/', domain))
-            expected_results = simplejson.load(expected_fp)
-            
-            # NOTE: Toggle condition below to write expected results from the parse results
-            # This will overwrite the existing expected results. Only do this if you've manually
-            # confirmed that the parser is generating correct values at its current state.
-            if False:
-                expected_fp = open(os.path.join('test/samples/expected/', domain), 'w')
-                expected_results = simplejson.dump(results, expected_fp)
-                continue
+            with open(os.path.join('test/samples/expected/', domain)) as infil:
+                expected_results = simplejson.load(infil)
             
             # Compare each key
             for key in results:
+                total += 1
                 result = results.get(key)
+                if isinstance(result, datetime.datetime):
+                    result = str(result)
                 expected = expected_results.get(key)
                 if expected != result:
                     print("%s \t(%s):\t %s != %s" % (domain, key, result, expected))
                     fail += 1
             
         if fail:
-            self.fail("%d sample whois attributes were not parsed properly!" % fail)
+            self.fail("%d/%d sample whois attributes were not parsed properly!"
+                      % (fail, total))
 
 
 if __name__ == '__main__':
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_query.py	Thu Jun 02 14:05:21 2016 -0700
@@ -0,0 +1,25 @@
+# coding=utf-8
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future import standard_library
+standard_library.install_aliases()
+from builtins import *
+import unittest
+from whois import whois
+
+
+class TestQuery(unittest.TestCase):
+    def test_simple_ascii_domain(self):
+        domain = 'google.com'
+        whois(domain)
+
+    def test_simple_unicode_domain(self):
+        domain = 'нарояци.com'
+        whois(domain)
+
+    def test_unicode_domain_and_tld(self):
+        domain = 'россия.рф'
+        whois(domain)
--- a/whois/__init__.py	Thu Jun 02 13:59:46 2016 -0700
+++ b/whois/__init__.py	Thu Jun 02 14:05:21 2016 -0700
@@ -57,17 +57,18 @@
         return socket.gethostbyaddr(url)[0]
 
     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
-    suffixes = [
-        line.lower().strip().encode('utf-8')
-        for line in open(tlds_path).readlines()
-        if not line.startswith('#')
-    ]
+    with open(tlds_path) as tlds_fil:
+        suffixes = [line.lower().encode('utf-8')
+                    for line in (x.strip() for x in tlds_fil)
+                    if not line.startswith('#')]
+    suff = 'xn--p1ai'
 
     if not isinstance(url, str):
         url = url.decode('utf-8')
-    url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower()
+    url = re.sub('^.*://', '', url)
+    url = url.split('/')[0].lower().encode('idna')
+
     domain = []
-
     for section in url.split(b'.'):
         if section in suffixes:
             domain.append(section)
--- a/whois/whois.py	Thu Jun 02 13:59:46 2016 -0700
+++ b/whois/whois.py	Thu Jun 02 14:05:21 2016 -0700
@@ -97,8 +97,12 @@
             s.connect((hostname, 43))
             # end takes bytes as an input
             queryBytes = None
-            if type(query) is not str:
+            try:
                 query = query.decode('utf-8')
+            except UnicodeEncodeError:
+                pass  # Already Unicode (python2's error)
+            except AttributeError:
+                pass  # Already Unicode (python3's error)
 
             if hostname == NICClient.DENICHOST:
                 queryBytes = "-T dn,ace -C UTF-8 " + query
@@ -132,14 +136,16 @@
 
     def choose_server(self, domain):
         """Choose initial lookup NIC host"""
-        if type(domain) is not str:
+        try:
+            domain = domain.encode('idna').decode('utf-8')
+        except TypeError:
             domain = domain.decode('utf-8').encode('idna').decode('utf-8')
         if domain.endswith("-NORID"):
             return NICClient.NORIDHOST
-        pos = domain.rfind('.')
-        if pos == -1:
+        domain = domain.split('.')
+        if len(domain) < 2:
             return None
-        tld = domain[pos+1:]
+        tld = domain[-1]
         if tld[0].isdigit():
             return NICClient.ANICHOST
         return tld + NICClient.QNICHOST_TAIL