--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_main.py Fri Aug 15 17:11:52 2014 +0300
@@ -0,0 +1,26 @@
+# coding=utf-8
+
+import unittest
+from whois import extract_domain
+
+
+class TestExtractDomain(unittest.TestCase):
+ def test_simple_ascii_domain(self):
+ url = 'google.com'
+ domain = url
+ self.assertEqual(domain, extract_domain(url))
+
+ def test_ascii_with_schema_path_and_query(self):
+ url = 'https://www.google.com/search?q=why+is+domain+whois+such+a+mess'
+ domain = 'google.com'
+ self.assertEqual(domain, extract_domain(url))
+
+ def test_simple_unicode_domain(self):
+ url = 'http://нарояци.com/'
+ domain = 'нарояци.com'
+ self.assertEqual(domain, extract_domain(url))
+
+ def test_unicode_domain_and_tld(self):
+ url = 'http://россия.рф/'
+ domain = 'россия.рф'
+ self.assertEqual(domain, extract_domain(url))
--- a/whois/__init__.py Fri Aug 15 13:31:24 2014 +0300
+++ b/whois/__init__.py Fri Aug 15 17:11:52 2014 +0300
@@ -58,12 +58,17 @@
url = re.sub('^.*://', '', url).split('/')[0].lower()
domain = []
- for section in url.split('.'):
+ url_sections = (
+ section.decode('utf-8').encode('idna')
+ for section in url.split('.')
+ )
+
+ for section in url_sections:
if section in suffixes:
domain.append(section)
else:
domain = [section]
- return '.'.join(domain)
+ return '.'.join(domain).decode('idna').encode('utf-8')
if __name__ == '__main__':