--- a/test/test_parser.py Thu Aug 31 22:10:29 2017 +0000
+++ b/test/test_parser.py Tue Oct 24 15:23:48 2017 +0300
@@ -13,7 +13,10 @@
import datetime
-import simplejson
+try:
+ import json
+except:
+ import simplejson as json
from glob import glob
from whois.parser import WhoisEntry, cast_date
@@ -25,7 +28,7 @@
Updated Date: 14-apr-2008
Creation Date: 14-apr-2008
Expiration Date: 14-apr-2009
-
+
>>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<<
"""
w = WhoisEntry.load('urlowl.com', data)
@@ -43,7 +46,7 @@
Iterate over all of the sample/whois/*.com files, read the data,
parse it, and compare to the expected values in sample/expected/.
Only keys defined in keys_to_test will be tested.
-
+
To generate fresh expected value dumps, see NOTE below.
"""
keys_to_test = ['domain_name', 'expiration_date', 'updated_date',
@@ -55,7 +58,7 @@
domain = os.path.basename(path)
with open(path) as whois_fp:
data = whois_fp.read()
-
+
w = WhoisEntry.load(domain, data)
results = {key: w.get(key) for key in keys_to_test}
@@ -71,14 +74,14 @@
'{} is not JSON serializable'.format(repr(obj)))
outfile_name = os.path.join('test/samples/expected/', domain)
with open(outfile_name, 'w') as outfil:
- expected_results = simplejson.dump(results, outfil,
+ expected_results = json.dump(results, outfil,
default=date2str4json)
continue
# Load expected result
with open(os.path.join('test/samples/expected/', domain)) as infil:
- expected_results = simplejson.load(infil)
-
+ expected_results = json.load(infil)
+
# Compare each key
for key in results:
total += 1
@@ -89,7 +92,7 @@
if expected != result:
print("%s \t(%s):\t %s != %s" % (domain, key, result, expected))
fail += 1
-
+
if fail:
self.fail("%d/%d sample whois attributes were not parsed properly!"
% (fail, total))
@@ -131,38 +134,108 @@
ns1.testserver1.net
ns2.testserver2.net
"""
- results = WhoisEntry.load('testcompany.ca', data)
expected_results = {
- "updated_date": "2016-04-29 00:00:00",
+ "updated_date": "2016-04-29 00:00:00",
"registrant_name": [
- "Webnames.ca Inc.",
- "Test Industries",
- "Test Person1",
+ "Webnames.ca Inc.",
+ "Test Industries",
+ "Test Person1",
"Test Persion2"
- ],
+ ],
"fax": [
- "+1.123434123",
+ "+1.123434123",
"+1.12312993873"
- ],
- "dnssec": "Unsigned",
- "registrant_number": "70",
- "expiration_date": "2020-03-08 00:00:00",
- "domain_name": "testdomain.ca",
- "creation_date": "2000-11-20 00:00:00",
+ ],
+ "dnssec": "Unsigned",
+ "registrant_number": "70",
+ "expiration_date": "2020-03-08 00:00:00",
+ "domain_name": "testdomain.ca",
+ "creation_date": "2000-11-20 00:00:00",
"phone": [
- "+1.1235434123x123",
+ "+1.1235434123x123",
"+1.09876545123"
- ],
- "domain_status": "registered",
+ ],
+ "domain_status": "registered",
"emails": [
- "testperson1@testcompany.ca",
+ "testperson1@testcompany.ca",
"testpersion2@testcompany.ca"
]
}
-
+ self._parse_and_compare('testcompany.ca', data, expected_results)
+
+ def test_il_parse(self):
+ data = """
+ query: python.org.il
+
+ reg-name: python
+ domain: python.org.il
+
+ descr: Arik Baratz
+ descr: PO Box 7775 PMB 8452
+ descr: San Francisco, CA
+ descr: 94120
+ descr: USA
+ phone: +1 650 6441973
+ e-mail: hostmaster AT arik.baratz.org
+ admin-c: LD-AB16063-IL
+ tech-c: LD-AB16063-IL
+ zone-c: LD-AB16063-IL
+ nserver: dns1.zoneedit.com
+ nserver: dns2.zoneedit.com
+ nserver: dns3.zoneedit.com
+ validity: 10-05-2018
+ DNSSEC: unsigned
+ status: Transfer Locked
+ changed: domain-registrar AT isoc.org.il 20050524 (Assigned)
+ changed: domain-registrar AT isoc.org.il 20070520 (Transferred)
+ changed: domain-registrar AT isoc.org.il 20070520 (Changed)
+ changed: domain-registrar AT isoc.org.il 20070520 (Changed)
+ changed: domain-registrar AT isoc.org.il 20070807 (Changed)
+ changed: domain-registrar AT isoc.org.il 20071025 (Changed)
+ changed: domain-registrar AT isoc.org.il 20071025 (Changed)
+ changed: domain-registrar AT isoc.org.il 20081221 (Changed)
+ changed: domain-registrar AT isoc.org.il 20081221 (Changed)
+ changed: domain-registrar AT isoc.org.il 20160301 (Changed)
+ changed: domain-registrar AT isoc.org.il 20160301 (Changed)
+
+ person: Arik Baratz
+ address: PO Box 7775 PMB 8452
+ address: San Francisco, CA
+ address: 94120
+ address: USA
+ phone: +1 650 9635533
+ e-mail: hostmaster AT arik.baratz.org
+ nic-hdl: LD-AB16063-IL
+ changed: Managing Registrar 20070514
+ changed: Managing Registrar 20081002
+ changed: Managing Registrar 20081221
+ changed: Managing Registrar 20081221
+ changed: Managing Registrar 20090502
+
+ registrar name: LiveDns Ltd
+ registrar info: http://domains.livedns.co.il
+ """
+ expected_results = {
+ "updated_date": None,
+ "registrant_name": "Arik Baratz",
+ "fax": None,
+ "dnssec": "unsigned",
+ "expiration_date": "2018-05-10 00:00:00",
+ "domain_name": "python.org.il",
+ "creation_date": None,
+ "phone": ['+1 650 6441973', '+1 650 9635533'],
+ "status": "Transfer Locked",
+ "emails": "hostmaster@arik.baratz.org",
+ "name_servers": ["dns1.zoneedit.com", "dns2.zoneedit.com", "dns3.zoneedit.com"],
+ "registrar": "LiveDns Ltd",
+ "referral_url": "http://domains.livedns.co.il"
+ }
+ self._parse_and_compare('python.org.il', data, expected_results)
+
+ def _parse_and_compare(self, domain_name, data, expected_results):
+ results = WhoisEntry.load(domain_name, data)
fail = 0
total = 0
-
# Compare each key
for key in expected_results:
total += 1
@@ -171,7 +244,7 @@
result = str(result)
expected = expected_results.get(key)
if expected != result:
- print("%s \t(%s):\t %s != %s" % (domain, key, result, expected))
+ print("%s \t(%s):\t %s != %s" % (domain_name, key, result, expected))
fail += 1
if fail:
self.fail("%d/%d sample whois attributes were not parsed properly!"
@@ -179,7 +252,5 @@
-
-
if __name__ == '__main__':
unittest.main()
--- a/whois/parser.py Thu Aug 31 22:10:29 2017 +0000
+++ b/whois/parser.py Tue Oct 24 15:23:48 2017 +0300
@@ -130,13 +130,7 @@
for data in re.findall(regex, self.text, re.IGNORECASE):
matches = data if isinstance(data, tuple) else [data]
for value in matches:
- value = value.strip()
- if value and isinstance(value, basestring) and not value.isdigit() and '_date' in attr:
- # try casting to date format
- value = cast_date(
- value,
- dayfirst=self.dayfirst,
- yearfirst=self.yearfirst)
+ value = self._preprocess(attr, value)
if value and value not in values:
# avoid duplicates
values.append(value)
@@ -149,6 +143,15 @@
self[attr] = values
+ def _preprocess(self, attr, value):
+ value = value.strip()
+ if value and isinstance(value, basestring) and not value.isdigit() and '_date' in attr:
+ # try casting to date format
+ value = cast_date(
+ value,
+ dayfirst=self.dayfirst,
+ yearfirst=self.yearfirst)
+ return value
def __setitem__(self, name, value):
super(WhoisEntry, self).__setitem__(name, value)
@@ -258,6 +261,8 @@
return WhoisIt(domain, text)
elif domain.endswith('.ai'):
return WhoisAi(domain, text)
+ elif domain.endswith('.il'):
+ return WhoisIl(domain, text)
else:
return WhoisEntry(domain, text)
@@ -1239,3 +1244,33 @@
raise PywhoisError(text)
else:
WhoisEntry.__init__(self, domain, text, self.regex)
+
+class WhoisIl(WhoisEntry):
+ """Whois parser for .il domains
+ """
+ regex = {
+ 'domain_name': 'domain: *(.+)',
+ 'expiration_date': 'validity: *(.+)',
+ 'registrant_name': 'person: *(.+)',
+ 'dnssec': 'DNSSEC: *(.+)',
+ 'status': 'status: *(.+)',
+ 'name_servers': 'nserver: *(.+)',
+ 'emails': 'e-mail: *(.+)',
+ 'phone': 'phone: *(.+)',
+ 'name_servers': 'nserver: *(.+)',
+ 'registrar': 'registrar name: *(.+)',
+ 'referral_url': 'registrar info: *(.+)',
+ }
+
+ dayfirst = True
+
+ def __init__(self, domain, text):
+ if 'No data was found' in text:
+ raise PywhoisError(text)
+ else:
+ WhoisEntry.__init__(self, domain, text, self.regex)
+
+ def _preprocess(self, attr, value):
+ if attr == 'emails':
+ value = value.replace(' AT ', '@')
+ return super(WhoisIl, self)._preprocess(attr, value)