# HG changeset patch # User Richard Baron Penman # Date 1357629798 -39600 # Node ID c57439b500cb3ffd6beb4ac8914552c69f4df2a9 # Parent 5083c26d8f93d719d0011089690687c862997ddb fixed test cases diff -r 5083c26d8f93 -r c57439b500cb pywhois/__init__.py --- a/pywhois/__init__.py Tue Jan 08 16:43:03 2013 +1100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -import re -import sys -import subprocess -import socket -from parser import WhoisEntry -from whois import NICClient - - -def whois(url): - # clean domain to expose netloc - domain = extract_domain(url) - try: - raise OSError() - # try native whois command first - r = subprocess.Popen(['whois', domain], stdout=subprocess.PIPE) - text = r.stdout.read() - except OSError: - # try experimental client - nic_client = NICClient() - text = nic_client.whois_lookup(None, domain, 0) - print text - return WhoisEntry.load(domain, text) - - -def extract_domain(url): - """Extract the domain from the given URL - - >>> extract_domain('http://www.google.com.au/tos.html') - 'google.com.au' - >>> extract_domain('http://blog.webscraping.com') - 'webscraping.com' - >>> extract_domain('69.59.196.211') - 'stackoverflow.com' - """ - if re.match(r'\d+.\d+.\d+.\d+', url): - # this is an IP address - return socket.gethostbyaddr(url)[0] - - suffixes = 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi', 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec', 'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo', 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy', 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tp', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn', 'ye', 'yt', 'za', 'zm', 'zw' - url = re.sub('^.*://', '', url).split('/')[0].lower() - domain = [] - for section in url.split('.'): - if section in suffixes: - domain.append(section) - else: - domain = [section] - return '.'.join(domain) - - -if __name__ == '__main__': - try: - url = sys.argv[1] - except IndexError: - print 'Usage: %s url' % sys.argv[0] - else: - print whois(url) diff -r 5083c26d8f93 -r c57439b500cb pywhois/parser.py --- a/pywhois/parser.py Tue Jan 08 16:43:03 2013 +1100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,451 +0,0 @@ -# parser.py - Module for parsing whois response data -# Copyright (c) 2008 Andrey Petrov -# -# This module is part of pywhois and is released under -# the MIT license: http://www.opensource.org/licenses/mit-license.php - -import re -from datetime import datetime - - -class PywhoisError(Exception): - pass - - -def cast_date(s): - """Convert any date string found in WHOIS to a datetime object. - """ - known_formats = [ - '%d-%b-%Y', # 02-jan-2000 - '%Y-%m-%d', # 2000-01-02 - '%d.%m.%Y', # 2000-01-02 - '%Y.%m.%d', # 2000.01.02 - '%Y/%m/%d', # 2000/01/02 - '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC - '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011 - '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z - ] - - for known_format in known_formats: - try: - return datetime.strptime(s.strip(), known_format) - except ValueError, e: - pass # Wrong format, keep trying - return s - - -class WhoisEntry(object): - """Base class for parsing a Whois entries. - """ - # regular expressions to extract domain data from whois profile - # child classes will override this - _regex = { - 'domain_name': 'Domain Name:\s?(.+)', - 'registrar': 'Registrar:\s?(.+)', - 'whois_server': 'Whois Server:\s?(.+)', - 'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server - 'updated_date': 'Updated Date:\s?(.+)', - 'creation_date': 'Creation Date:\s?(.+)', - 'expiration_date': 'Expiration Date:\s?(.+)', - 'name_servers': 'Name Server:\s?(.+)', # list of name servers - 'status': 'Status:\s?(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses - } - - def __init__(self, domain, text, regex=None): - self.domain = domain - self.text = text - if regex is not None: - self._regex = regex - - - def __getattr__(self, attr): - """The first time an attribute is called it will be calculated here. - The attribute is then set to be accessed directly by subsequent calls. - """ - whois_regex = self._regex.get(attr) - if whois_regex: - values = re.findall(whois_regex, self.text) - # try casting to date format - values = [cast_date(value.strip()) for value in values] - if len(values) == 1: - values = values[0] - setattr(self, attr, values) - return getattr(self, attr) - else: - raise KeyError('Unknown attribute: %s' % attr) - - def __str__(self): - """Print all whois properties of domain - """ - return '\n'.join('%s: %s' % (attr, str(getattr(self, attr))) for attr in self.attrs()) - - - def attrs(self): - """Return list of attributes that can be extracted for this domain - """ - return sorted(self._regex.keys()) - - - @staticmethod - def load(domain, text): - """Given whois output in ``text``, return an instance of ``WhoisEntry`` that represents its parsed contents. - """ - if text.strip() == 'No whois server is known for this kind of object.': - raise PywhoisError(text) - - if domain.endswith('.com'): - return WhoisCom(domain, text) - elif domain.endswith('.net'): - return WhoisNet(domain, text) - elif domain.endswith('.org'): - return WhoisOrg(domain, text) - elif domain.endswith('.name'): - return WhoisName(domain, text) - elif domain.endswith('.me'): - return WhoisMe(domain, text) - elif domain.endswith('.ru'): - return WhoisRu(domain, text) - elif domain.endswith('.us'): - return WhoisUs(domain, text) - elif domain.endswith('.uk'): - return WhoisUk(domain, text) - elif domain.endswith('.fr'): - return WhoisFr(domain, text) - elif domain.endswith('.fi'): - return WhoisFi(domain, text) - elif domain.endswith('.jp'): - return WhoisJp(domain, text) - elif domain.endswith('.pl'): - return WhoisPl(domain, text) - else: - return WhoisEntry(domain, text) - - - -class WhoisCom(WhoisEntry): - """Whois parser for .com domains - """ - def __init__(self, domain, text): - if 'No match for "' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text) - - -class WhoisNet(WhoisEntry): - """Whois parser for .net domains - """ - def __init__(self, domain, text): - if 'No match for "' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text) - - -class WhoisOrg(WhoisEntry): - """Whois parser for .org domains - """ - def __init__(self, domain, text): - if text.strip() == 'NOT FOUND': - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text) - - -class WhoisRu(WhoisEntry): - """Whois parser for .ru domains - """ - regex = { - 'domain_name': 'domain:\s*(.+)', - 'registrar': 'registrar:\s*(.+)', - 'creation_date': 'created:\s*(.+)', - 'expiration_date': 'paid-till:\s*(.+)', - 'name_servers': 'nserver:\s*(.+)', # list of name servers - 'status': 'state:\s*(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses - } - - def __init__(self, domain, text): - if text.strip() == 'No entries found': - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisName(WhoisEntry): - """Whois parser for .name domains - """ - regex = { - 'domain_name_id': 'Domain Name ID:\s*(.+)', - 'domain_name': 'Domain Name:\s*(.+)', - 'registrar_id': 'Sponsoring Registrar ID:\s*(.+)', - 'registrar': 'Sponsoring Registrar:\s*(.+)', - 'registrant_id': 'Registrant ID:\s*(.+)', - 'admin_id': 'Admin ID:\s*(.+)', - 'technical_id': 'Tech ID:\s*(.+)', - 'billing_id': 'Billing ID:\s*(.+)', - 'creation_date': 'Created On:\s*(.+)', - 'expiration_date': 'Expires On:\s*(.+)', - 'updated_date': 'Updated On:\s*(.+)', - 'name_server_ids': 'Name Server ID:\s*(.+)', # list of name server ids - 'name_servers': 'Name Server:\s*(.+)', # list of name servers - 'status': 'Domain Status:\s*(.+)', # list of statuses - } - def __init__(self, domain, text): - if 'No match.' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisUs(WhoisEntry): - """Whois parser for .us domains - """ - regex = { - 'domain_name': 'Domain Name:\s*(.+)', - 'domain__id': 'Domain ID:\s*(.+)', - 'registrar': 'Sponsoring Registrar:\s*(.+)', - 'registrar_id': 'Sponsoring Registrar IANA ID:\s*(.+)', - 'registrar_url': 'Registrar URL \(registration services\):\s*(.+)', - 'status': 'Domain Status:\s*(.+)', # list of statuses - 'registrant_id': 'Registrant ID:\s*(.+)', - 'registrant_name': 'Registrant Name:\s*(.+)', - 'registrant_address1': 'Registrant Address1:\s*(.+)', - 'registrant_address2': 'Registrant Address2:\s*(.+)', - 'registrant_city': 'Registrant City:\s*(.+)', - 'registrant_state_province': 'Registrant State/Province:\s*(.+)', - 'registrant_postal_code': 'Registrant Postal Code:\s*(.+)', - 'registrant_country': 'Registrant Country:\s*(.+)', - 'registrant_country_code': 'Registrant Country Code:\s*(.+)', - 'registrant_phone_number': 'Registrant Phone Number:\s*(.+)', - 'registrant_email': 'Registrant Email:\s*(.+)', - 'registrant_application_purpose': 'Registrant Application Purpose:\s*(.+)', - 'registrant_nexus_category': 'Registrant Nexus Category:\s*(.+)', - 'admin_id': 'Administrative Contact ID:\s*(.+)', - 'admin_name': 'Administrative Contact Name:\s*(.+)', - 'admin_address1': 'Administrative Contact Address1:\s*(.+)', - 'admin_address2': 'Administrative Contact Address2:\s*(.+)', - 'admin_city': 'Administrative Contact City:\s*(.+)', - 'admin_state_province': 'Administrative Contact State/Province:\s*(.+)', - 'admin_postal_code': 'Administrative Contact Postal Code:\s*(.+)', - 'admin_country': 'Administrative Contact Country:\s*(.+)', - 'admin_country_code': 'Administrative Contact Country Code:\s*(.+)', - 'admin_phone_number': 'Administrative Contact Phone Number:\s*(.+)', - 'admin_email': 'Administrative Contact Email:\s*(.+)', - 'admin_application_purpose': 'Administrative Application Purpose:\s*(.+)', - 'admin_nexus_category': 'Administrative Nexus Category:\s*(.+)', - 'billing_id': 'Billing Contact ID:\s*(.+)', - 'billing_name': 'Billing Contact Name:\s*(.+)', - 'billing_address1': 'Billing Contact Address1:\s*(.+)', - 'billing_address2': 'Billing Contact Address2:\s*(.+)', - 'billing_city': 'Billing Contact City:\s*(.+)', - 'billing_state_province': 'Billing Contact State/Province:\s*(.+)', - 'billing_postal_code': 'Billing Contact Postal Code:\s*(.+)', - 'billing_country': 'Billing Contact Country:\s*(.+)', - 'billing_country_code': 'Billing Contact Country Code:\s*(.+)', - 'billing_phone_number': 'Billing Contact Phone Number:\s*(.+)', - 'billing_email': 'Billing Contact Email:\s*(.+)', - 'billing_application_purpose': 'Billing Application Purpose:\s*(.+)', - 'billing_nexus_category': 'Billing Nexus Category:\s*(.+)', - 'tech_id': 'Technical Contact ID:\s*(.+)', - 'tech_name': 'Technical Contact Name:\s*(.+)', - 'tech_address1': 'Technical Contact Address1:\s*(.+)', - 'tech_address2': 'Technical Contact Address2:\s*(.+)', - 'tech_city': 'Technical Contact City:\s*(.+)', - 'tech_state_province': 'Technical Contact State/Province:\s*(.+)', - 'tech_postal_code': 'Technical Contact Postal Code:\s*(.+)', - 'tech_country': 'Technical Contact Country:\s*(.+)', - 'tech_country_code': 'Technical Contact Country Code:\s*(.+)', - 'tech_phone_number': 'Technical Contact Phone Number:\s*(.+)', - 'tech_email': 'Technical Contact Email:\s*(.+)', - 'tech_application_purpose': 'Technical Application Purpose:\s*(.+)', - 'tech_nexus_category': 'Technical Nexus Category:\s*(.+)', - 'name_servers': 'Name Server:\s*(.+)', # list of name servers - 'created_by_registrar': 'Created by Registrar:\s*(.+)', - 'last_updated_by_registrar': 'Last Updated by Registrar:\s*(.+)', - 'creation_date': 'Domain Registration Date:\s*(.+)', - 'expiration_date': 'Domain Expiration Date:\s*(.+)', - 'updated_date': 'Domain Last Updated Date:\s*(.+)', - } - def __init__(self, domain, text): - if 'Not found:' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisPl(WhoisEntry): - """Whois parser for .uk domains - """ - regex = { - 'domain_name': 'DOMAIN NAME:\s*(.+)\n', - 'registrar': 'REGISTRAR:\n\s*(.+)', - 'registrar_url': 'URL:\s*(.+)', # not available - 'status': 'Registration status:\n\s*(.+)', # not available - 'registrant_name': 'Registrant:\n\s*(.+)', # not available - 'creation_date': 'created:\s*(.+)\n', - 'expiration_date': 'renewal date:\s*(.+)', - 'updated_date': 'last modified:\s*(.+)\n', - } - def __init__(self, domain, text): - if 'Not found:' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisMe(WhoisEntry): - """Whois parser for .me domains - """ - regex = { - 'domain_id': 'Domain ID:(.+)', - 'domain_name': 'Domain Name:(.+)', - 'creation_date': 'Domain Create Date:(.+)', - 'updated_date': 'Domain Last Updated Date:(.+)', - 'expiration_date': 'Domain Expiration Date:(.+)', - 'transfer_date': 'Last Transferred Date:(.+)', - 'trademark_name': 'Trademark Name:(.+)', - 'trademark_country': 'Trademark Country:(.+)', - 'trademark_number': 'Trademark Number:(.+)', - 'trademark_application_date': 'Date Trademark Applied For:(.+)', - 'trademark_registration_date': 'Date Trademark Registered:(.+)', - 'registrar': 'Sponsoring Registrar:(.+)', - 'created_by': 'Created by:(.+)', - 'updated_by': 'Last Updated by Registrar:(.+)', - 'status': 'Domain Status:(.+)', # list of statuses - 'registrant_id': 'Registrant ID:(.+)', - 'registrant_name': 'Registrant Name:(.+)', - 'registrant_org': 'Registrant Organization:(.+)', - 'registrant_address': 'Registrant Address:(.+)', - 'registrant_address2': 'Registrant Address2:(.+)', - 'registrant_address3': 'Registrant Address3:(.+)', - 'registrant_city': 'Registrant City:(.+)', - 'registrant_state_province': 'Registrant State/Province:(.+)', - 'registrant_country': 'Registrant Country/Economy:(.+)', - 'registrant_postal_code': 'Registrant Postal Code:(.+)', - 'registrant_phone': 'Registrant Phone:(.+)', - 'registrant_phone_ext': 'Registrant Phone Ext\.:(.+)', - 'registrant_fax': 'Registrant FAX:(.+)', - 'registrant_fax_ext': 'Registrant FAX Ext\.:(.+)', - 'registrant_email': 'Registrant E-mail:(.+)', - 'admin_id': 'Admin ID:(.+)', - 'admin_name': 'Admin Name:(.+)', - 'admin_org': 'Admin Organization:(.+)', - 'admin_address': 'Admin Address:(.+)', - 'admin_address2': 'Admin Address2:(.+)', - 'admin_address3': 'Admin Address3:(.+)', - 'admin_city': 'Admin City:(.+)', - 'admin_state_province': 'Admin State/Province:(.+)', - 'admin_country': 'Admin Country/Economy:(.+)', - 'admin_postal_code': 'Admin Postal Code:(.+)', - 'admin_phone': 'Admin Phone:(.+)', - 'admin_phone_ext': 'Admin Phone Ext\.:(.+)', - 'admin_fax': 'Admin FAX:(.+)', - 'admin_fax_ext': 'Admin FAX Ext\.:(.+)', - 'admin_email': 'Admin E-mail:(.+)', - 'tech_id': 'Tech ID:(.+)', - 'tech_name': 'Tech Name:(.+)', - 'tech_org': 'Tech Organization:(.+)', - 'tech_address': 'Tech Address:(.+)', - 'tech_address2': 'Tech Address2:(.+)', - 'tech_address3': 'Tech Address3:(.+)', - 'tech_city': 'Tech City:(.+)', - 'tech_state_province': 'Tech State/Province:(.+)', - 'tech_country': 'Tech Country/Economy:(.+)', - 'tech_postal_code': 'Tech Postal Code:(.+)', - 'tech_phone': 'Tech Phone:(.+)', - 'tech_phone_ext': 'Tech Phone Ext\.:(.+)', - 'tech_fax': 'Tech FAX:(.+)', - 'tech_fax_ext': 'Tech FAX Ext\.:(.+)', - 'tech_email': 'Tech E-mail:(.+)', - 'name_servers': 'Nameservers:(.+)', # list of name servers - } - def __init__(self, domain, text): - if 'NOT FOUND' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisUk(WhoisEntry): - """Whois parser for .uk domains - """ - regex = { - 'domain_name': 'Domain name:\n\s*(.+)', - 'registrar': 'Registrar:\n\s*(.+)', - 'registrar_url': 'URL:\s*(.+)', - 'status': 'Registration status:\n\s*(.+)', # list of statuses - 'registrant_name': 'Registrant:\n\s*(.+)', - 'creation_date': 'Registered on:\s*(.+)', - 'expiration_date': 'Expiry date:\s*(.+)', - 'updated_date': 'Last updated:\s*(.+)', - 'name_servers': 'Name servers:\s*(.+)', - } - def __init__(self, domain, text): - if 'Not found:' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisFr(WhoisEntry): - """Whois parser for .fr domains - """ - regex = { - 'domain_name': 'domain:\s*(.+)', - 'registrar': 'registrar:\s*(.+)', - 'creation_date': 'created:\s*(.+)', - 'expiration_date': 'anniversary:\s*(.+)', - 'name_servers': 'nserver:\s*(.+)', # list of name servers - 'status': 'status:\s*(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses - 'updated_date': 'last-update:\s*(.+)', - } - - def __init__(self, domain, text): - if text.strip() == 'No entries found': - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisFi(WhoisEntry): - """Whois parser for .fi domains - """ - regex = { - 'domain_name': 'domain:\s*([\S]+)', - 'registrant_name': 'descr:\s*([\S\ ]+)', - 'registrant_address': 'address:\s*([\S\ ]+)', - 'registrant_phone': 'phone:\s*([\S\ ]+)', - 'status': 'status:\s*([\S]+)', # list of statuses - 'creation_date': 'created:\s*([\S]+)', - 'updated_date': 'modified:\s*([\S]+)', - 'expiration_date': 'expires:\s*([\S]+)', - 'name_servers': 'nserver:\s*([\S]+) \[(\S+)\]', # list of name servers - 'dnssec': 'dnssec:\s*([\S]+)', # list of name servers - } - def __init__(self, domain, text): - if 'Not found:' in text: - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) - - -class WhoisJp(WhoisEntry): - """Whois parser for .jp domains - """ - regex = { - 'domain_name': 'a\. \[Domain Name\]\s*(.+)', - 'registrant_org': 'g\. \[Organization\](.+)', - 'creation_date': r'\[Registered Date\]\s*(.+)', - 'name_servers': 'p\. \[Name Server\]\s*(.+)', # list of name servers - 'updated_date': '\[Last Update\]\s?(.+)', - 'status': '\[State\]\s*(.+)', # list of statuses - } - - def __init__(self, domain, text): - if text.strip() == 'No entries found': - raise PywhoisError(text) - else: - WhoisEntry.__init__(self, domain, text, self.regex) diff -r 5083c26d8f93 -r c57439b500cb pywhois/whois.py --- a/pywhois/whois.py Tue Jan 08 16:43:03 2013 +1100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,275 +0,0 @@ -""" -Whois client for python - -transliteration of: -http://www.opensource.apple.com/source/adv_cmds/adv_cmds-138.1/whois/whois.c - -Copyright (c) 2010 Chris Wolf - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - - Last edited by: $Author$ - on: $DateTime$ - Revision: $Revision$ - Id: $Id$ - Author: Chris Wolf -""" -import sys -import socket -import optparse -#import pdb - -def enforce_ascii(a): - if isinstance(a, str) or isinstance(a, unicode): - # return a.encode('ascii', 'replace') - r = "" - for i in a: - if ord(i) >= 128: - r += "?" - else: - r += i - return r - else: - return a - -class NICClient(object) : - - ABUSEHOST = "whois.abuse.net" - NICHOST = "whois.crsnic.net" - INICHOST = "whois.networksolutions.com" - DNICHOST = "whois.nic.mil" - GNICHOST = "whois.nic.gov" - ANICHOST = "whois.arin.net" - LNICHOST = "whois.lacnic.net" - RNICHOST = "whois.ripe.net" - PNICHOST = "whois.apnic.net" - MNICHOST = "whois.ra.net" - QNICHOST_TAIL = ".whois-servers.net" - SNICHOST = "whois.6bone.net" - BNICHOST = "whois.registro.br" - NORIDHOST = "whois.norid.no" - IANAHOST = "whois.iana.org" - DENICHOST = "de.whois-servers.net" - DEFAULT_PORT = "nicname" - WHOIS_SERVER_ID = "Whois Server:" - WHOIS_ORG_SERVER_ID = "Registrant Street1:Whois Server:" - - - WHOIS_RECURSE = 0x01 - WHOIS_QUICK = 0x02 - - ip_whois = [ LNICHOST, RNICHOST, PNICHOST, BNICHOST ] - - def __init__(self) : - self.use_qnichost = False - - def findwhois_server(self, buf, hostname): - """Search the initial TLD lookup results for the regional-specifc - whois server for getting contact details. - """ - #print 'finding whois server' - #print 'parameters:', buf, 'hostname', hostname - nhost = None - parts_index = 1 - start = buf.find(NICClient.WHOIS_SERVER_ID) - #print 'start', start - if (start == -1): - start = buf.find(NICClient.WHOIS_ORG_SERVER_ID) - parts_index = 2 - - if (start > -1): - end = buf[start:].find('\n') - #print 'end:', end - whois_line = buf[start:end+start] - #print 'whois_line', whois_line - nhost = whois_line.split(NICClient.WHOIS_SERVER_ID+' ').pop() - nhost = nhost.split('http://').pop() - #if the whois address is domain.tld/something then - #s.connect((hostname, 43)) does not work - if nhost.count('/') > 0: - nhost = None - #print 'nhost:',nhost - elif (hostname == NICClient.ANICHOST): - for nichost in NICClient.ip_whois: - if (buf.find(nichost) != -1): - nhost = nichost - break - return nhost - - def whois(self, query, hostname, flags): - """Perform initial lookup with TLD whois server - then, if the quick flag is false, search that result - for the region-specifc whois server and do a lookup - there for contact details - """ - #print 'Performing the whois' - #print 'parameters given:', query, hostname, flags - #pdb.set_trace() - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.connect((hostname, 43)) - """send takes bytes as an input - """ - queryBytes = None - if (hostname == NICClient.DENICHOST): - #print 'the domain is in NIC DENIC' - queryBytes = ("-T dn,ace -C UTF-8 " + query + "\r\n").encode() - #print 'queryBytes:', queryBytes - else: - queryBytes = (query + "\r\n").encode() - s.send(queryBytes) - """recv returns bytes - """ - #print s - response = b'' - while True: - d = s.recv(4096) - response += d - if not d: - break - s.close() - #pdb.set_trace() - nhost = None - #print 'response', response - response = enforce_ascii(response) - if (flags & NICClient.WHOIS_RECURSE and nhost == None): - #print 'Inside first if' - nhost = self.findwhois_server(response.decode(), hostname) - #print 'nhost is:', nhost - if (nhost != None): - #print 'inside second if' - response += self.whois(query, nhost, 0) - #print 'response', response - #print 'returning whois response' - return response.decode() - - def choose_server(self, domain): - """Choose initial lookup NIC host""" - if (domain.endswith("-NORID")): - return NICClient.NORIDHOST - pos = domain.rfind('.') - if (pos == -1): - return None - tld = domain[pos+1:] - if (tld[0].isdigit()): - return NICClient.ANICHOST - - return tld + NICClient.QNICHOST_TAIL - - def whois_lookup(self, options, query_arg, flags): - """Main entry point: Perform initial lookup on TLD whois server, - or other server to get region-specific whois server, then if quick - flag is false, perform a second lookup on the region-specific - server for contact records""" - #print 'whois_lookup' - nichost = None - #pdb.set_trace() - # this would be the case when this function is called by other than main - if (options == None): - options = {} - - if ( (not 'whoishost' in options or options['whoishost'] == None) - and (not 'country' in options or options['country'] == None)): - self.use_qnichost = True - options['whoishost'] = NICClient.NICHOST - if ( not (flags & NICClient.WHOIS_QUICK)): - flags |= NICClient.WHOIS_RECURSE - - if ('country' in options and options['country'] != None): - result = self.whois(query_arg, options['country'] + NICClient.QNICHOST_TAIL, flags) - elif (self.use_qnichost): - nichost = self.choose_server(query_arg) - if (nichost != None): - result = self.whois(query_arg, nichost, flags) - else: - result = self.whois(query_arg, options['whoishost'], flags) - #print 'whois_lookup finished' - return result -#---- END OF NICClient class def --------------------- - -def parse_command_line(argv): - """Options handling mostly follows the UNIX whois(1) man page, except - long-form options can also be used. - """ - flags = 0 - - usage = "usage: %prog [options] name" - - parser = optparse.OptionParser(add_help_option=False, usage=usage) - parser.add_option("-a", "--arin", action="store_const", - const=NICClient.ANICHOST, dest="whoishost", - help="Lookup using host " + NICClient.ANICHOST) - parser.add_option("-A", "--apnic", action="store_const", - const=NICClient.PNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.PNICHOST) - parser.add_option("-b", "--abuse", action="store_const", - const=NICClient.ABUSEHOST, dest="whoishost", - help="Lookup using host " + NICClient.ABUSEHOST) - parser.add_option("-c", "--country", action="store", - type="string", dest="country", - help="Lookup using country-specific NIC") - parser.add_option("-d", "--mil", action="store_const", - const=NICClient.DNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.DNICHOST) - parser.add_option("-g", "--gov", action="store_const", - const=NICClient.GNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.GNICHOST) - parser.add_option("-h", "--host", action="store", - type="string", dest="whoishost", - help="Lookup using specified whois host") - parser.add_option("-i", "--nws", action="store_const", - const=NICClient.INICHOST, dest="whoishost", - help="Lookup using host " + NICClient.INICHOST) - parser.add_option("-I", "--iana", action="store_const", - const=NICClient.IANAHOST, dest="whoishost", - help="Lookup using host " + NICClient.IANAHOST) - parser.add_option("-l", "--lcanic", action="store_const", - const=NICClient.LNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.LNICHOST) - parser.add_option("-m", "--ra", action="store_const", - const=NICClient.MNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.MNICHOST) - parser.add_option("-p", "--port", action="store", - type="int", dest="port", - help="Lookup using specified tcp port") - parser.add_option("-Q", "--quick", action="store_true", - dest="b_quicklookup", - help="Perform quick lookup") - parser.add_option("-r", "--ripe", action="store_const", - const=NICClient.RNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.RNICHOST) - parser.add_option("-R", "--ru", action="store_const", - const="ru", dest="country", - help="Lookup Russian NIC") - parser.add_option("-6", "--6bone", action="store_const", - const=NICClient.SNICHOST, dest="whoishost", - help="Lookup using host " + NICClient.SNICHOST) - parser.add_option("-?", "--help", action="help") - - - return parser.parse_args(argv) - -if __name__ == "__main__": - flags = 0 - nic_client = NICClient() - (options, args) = parse_command_line(sys.argv) - if (options.b_quicklookup is True): - flags = flags|NICClient.WHOIS_QUICK - print(nic_client.whois_lookup(options.__dict__, args[1], flags)) - - diff -r 5083c26d8f93 -r c57439b500cb setup.py --- a/setup.py Tue Jan 08 16:43:03 2013 +1100 +++ b/setup.py Tue Jan 08 18:23:18 2013 +1100 @@ -1,10 +1,10 @@ from setuptools import setup, find_packages import sys, os -version = '0.1' +version = '0.2' setup( - name='whois', + name='python-whois', version=version, description="Whois querying and parsing of domain registration information.", long_description='', @@ -16,13 +16,13 @@ 'Programming Language :: Python', 'Topic :: Internet :: WWW/HTTP' ], - keywords='whois', + keywords='whois, python', author='Richard Penman', author_email='richard@webscraping.com', url='http://code.google.com/p/pywhois/', license='MIT', - packages=['pywhois'], - package_dir={'pywhois':'pywhois'}, + packages=['whois'], + package_dir={'whois':'whois'}, include_package_data=True, zip_safe=False, ) diff -r 5083c26d8f93 -r c57439b500cb test/test_parser.py --- a/test/test_parser.py Tue Jan 08 16:43:03 2013 +1100 +++ b/test/test_parser.py Tue Jan 08 18:23:18 2013 +1100 @@ -4,12 +4,12 @@ import sys sys.path.append('../') -import time +import datetime import simplejson from glob import glob -from pywhois.parser import WhoisEntry, cast_date +from whois.parser import WhoisEntry, cast_date class TestParser(unittest.TestCase): def test_com_expiration(self): @@ -22,13 +22,13 @@ >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<< """ w = WhoisEntry.load('urlowl.com', data) - expires = w.get('expiration_date') - self.assertEquals(expires, ['14-apr-2009']) + expires = w.expiration_date.strftime('%Y-%m-%d') + self.assertEquals(expires, '2009-04-14') def test_cast_date(self): dates = ['14-apr-2008', '2008-04-14'] for d in dates: - r = time.strftime('%Y-%m-%d', cast_date(d)) + r = cast_date(d).strftime('%Y-%m-%d') self.assertEquals(r, '2008-04-14') def test_com_allsamples(self): @@ -73,4 +73,8 @@ fail += 1 if fail: - self.fail("%d sample whois attributes were not parsed properly!" % fail) \ No newline at end of file + self.fail("%d sample whois attributes were not parsed properly!" % fail) + + +if __name__ == '__main__': + unittest.main() diff -r 5083c26d8f93 -r c57439b500cb whois/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/whois/__init__.py Tue Jan 08 18:23:18 2013 +1100 @@ -0,0 +1,55 @@ +import re +import sys +import subprocess +import socket +from parser import WhoisEntry +from whois import NICClient + + +def whois(url): + # clean domain to expose netloc + domain = extract_domain(url) + try: + raise OSError() + # try native whois command first + r = subprocess.Popen(['whois', domain], stdout=subprocess.PIPE) + text = r.stdout.read() + except OSError: + # try experimental client + nic_client = NICClient() + text = nic_client.whois_lookup(None, domain, 0) + return WhoisEntry.load(domain, text) + + +def extract_domain(url): + """Extract the domain from the given URL + + >>> extract_domain('http://www.google.com.au/tos.html') + 'google.com.au' + >>> extract_domain('http://blog.webscraping.com') + 'webscraping.com' + >>> extract_domain('69.59.196.211') + 'stackoverflow.com' + """ + if re.match(r'\d+.\d+.\d+.\d+', url): + # this is an IP address + return socket.gethostbyaddr(url)[0] + + suffixes = 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi', 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec', 'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo', 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy', 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tp', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn', 'ye', 'yt', 'za', 'zm', 'zw' + url = re.sub('^.*://', '', url).split('/')[0].lower() + domain = [] + for section in url.split('.'): + if section in suffixes: + domain.append(section) + else: + domain = [section] + return '.'.join(domain) + + +if __name__ == '__main__': + try: + url = sys.argv[1] + except IndexError: + print('Usage: %s url' % sys.argv[0]) + else: + print(whois(url)) diff -r 5083c26d8f93 -r c57439b500cb whois/parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/whois/parser.py Tue Jan 08 18:23:18 2013 +1100 @@ -0,0 +1,451 @@ +# parser.py - Module for parsing whois response data +# Copyright (c) 2008 Andrey Petrov +# +# This module is part of pywhois and is released under +# the MIT license: http://www.opensource.org/licenses/mit-license.php + +import re +from datetime import datetime + + +class PywhoisError(Exception): + pass + + +def cast_date(s): + """Convert any date string found in WHOIS to a datetime object. + """ + known_formats = [ + '%d-%b-%Y', # 02-jan-2000 + '%Y-%m-%d', # 2000-01-02 + '%d.%m.%Y', # 2000-01-02 + '%Y.%m.%d', # 2000.01.02 + '%Y/%m/%d', # 2000/01/02 + '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC + '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011 + '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z + ] + + for known_format in known_formats: + try: + return datetime.strptime(s.strip(), known_format) + except ValueError as e: + pass # Wrong format, keep trying + return s + + +class WhoisEntry(object): + """Base class for parsing a Whois entries. + """ + # regular expressions to extract domain data from whois profile + # child classes will override this + _regex = { + 'domain_name': 'Domain Name:\s?(.+)', + 'registrar': 'Registrar:\s?(.+)', + 'whois_server': 'Whois Server:\s?(.+)', + 'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server + 'updated_date': 'Updated Date:\s?(.+)', + 'creation_date': 'Creation Date:\s?(.+)', + 'expiration_date': 'Expiration Date:\s?(.+)', + 'name_servers': 'Name Server:\s?(.+)', # list of name servers + 'status': 'Status:\s?(.+)', # list of statuses + 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses + } + + def __init__(self, domain, text, regex=None): + self.domain = domain + self.text = text + if regex is not None: + self._regex = regex + + + def __getattr__(self, attr): + """The first time an attribute is called it will be calculated here. + The attribute is then set to be accessed directly by subsequent calls. + """ + whois_regex = self._regex.get(attr) + if whois_regex: + values = re.findall(whois_regex, self.text, re.IGNORECASE) + # try casting to date format + values = [cast_date(value.strip()) for value in values] + if len(values) == 1: + values = values[0] + setattr(self, attr, values) + return getattr(self, attr) + else: + raise KeyError('Unknown attribute: %s' % attr) + + def __str__(self): + """Print all whois properties of domain + """ + return '\n'.join('%s: %s' % (attr, str(getattr(self, attr))) for attr in self.attrs()) + + + def attrs(self): + """Return list of attributes that can be extracted for this domain + """ + return sorted(self._regex.keys()) + + + @staticmethod + def load(domain, text): + """Given whois output in ``text``, return an instance of ``WhoisEntry`` that represents its parsed contents. + """ + if text.strip() == 'No whois server is known for this kind of object.': + raise PywhoisError(text) + + if domain.endswith('.com'): + return WhoisCom(domain, text) + elif domain.endswith('.net'): + return WhoisNet(domain, text) + elif domain.endswith('.org'): + return WhoisOrg(domain, text) + elif domain.endswith('.name'): + return WhoisName(domain, text) + elif domain.endswith('.me'): + return WhoisMe(domain, text) + elif domain.endswith('.ru'): + return WhoisRu(domain, text) + elif domain.endswith('.us'): + return WhoisUs(domain, text) + elif domain.endswith('.uk'): + return WhoisUk(domain, text) + elif domain.endswith('.fr'): + return WhoisFr(domain, text) + elif domain.endswith('.fi'): + return WhoisFi(domain, text) + elif domain.endswith('.jp'): + return WhoisJp(domain, text) + elif domain.endswith('.pl'): + return WhoisPl(domain, text) + else: + return WhoisEntry(domain, text) + + + +class WhoisCom(WhoisEntry): + """Whois parser for .com domains + """ + def __init__(self, domain, text): + if 'No match for "' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text) + + +class WhoisNet(WhoisEntry): + """Whois parser for .net domains + """ + def __init__(self, domain, text): + if 'No match for "' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text) + + +class WhoisOrg(WhoisEntry): + """Whois parser for .org domains + """ + def __init__(self, domain, text): + if text.strip() == 'NOT FOUND': + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text) + + +class WhoisRu(WhoisEntry): + """Whois parser for .ru domains + """ + regex = { + 'domain_name': 'domain:\s*(.+)', + 'registrar': 'registrar:\s*(.+)', + 'creation_date': 'created:\s*(.+)', + 'expiration_date': 'paid-till:\s*(.+)', + 'name_servers': 'nserver:\s*(.+)', # list of name servers + 'status': 'state:\s*(.+)', # list of statuses + 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses + } + + def __init__(self, domain, text): + if text.strip() == 'No entries found': + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisName(WhoisEntry): + """Whois parser for .name domains + """ + regex = { + 'domain_name_id': 'Domain Name ID:\s*(.+)', + 'domain_name': 'Domain Name:\s*(.+)', + 'registrar_id': 'Sponsoring Registrar ID:\s*(.+)', + 'registrar': 'Sponsoring Registrar:\s*(.+)', + 'registrant_id': 'Registrant ID:\s*(.+)', + 'admin_id': 'Admin ID:\s*(.+)', + 'technical_id': 'Tech ID:\s*(.+)', + 'billing_id': 'Billing ID:\s*(.+)', + 'creation_date': 'Created On:\s*(.+)', + 'expiration_date': 'Expires On:\s*(.+)', + 'updated_date': 'Updated On:\s*(.+)', + 'name_server_ids': 'Name Server ID:\s*(.+)', # list of name server ids + 'name_servers': 'Name Server:\s*(.+)', # list of name servers + 'status': 'Domain Status:\s*(.+)', # list of statuses + } + def __init__(self, domain, text): + if 'No match.' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisUs(WhoisEntry): + """Whois parser for .us domains + """ + regex = { + 'domain_name': 'Domain Name:\s*(.+)', + 'domain__id': 'Domain ID:\s*(.+)', + 'registrar': 'Sponsoring Registrar:\s*(.+)', + 'registrar_id': 'Sponsoring Registrar IANA ID:\s*(.+)', + 'registrar_url': 'Registrar URL \(registration services\):\s*(.+)', + 'status': 'Domain Status:\s*(.+)', # list of statuses + 'registrant_id': 'Registrant ID:\s*(.+)', + 'registrant_name': 'Registrant Name:\s*(.+)', + 'registrant_address1': 'Registrant Address1:\s*(.+)', + 'registrant_address2': 'Registrant Address2:\s*(.+)', + 'registrant_city': 'Registrant City:\s*(.+)', + 'registrant_state_province': 'Registrant State/Province:\s*(.+)', + 'registrant_postal_code': 'Registrant Postal Code:\s*(.+)', + 'registrant_country': 'Registrant Country:\s*(.+)', + 'registrant_country_code': 'Registrant Country Code:\s*(.+)', + 'registrant_phone_number': 'Registrant Phone Number:\s*(.+)', + 'registrant_email': 'Registrant Email:\s*(.+)', + 'registrant_application_purpose': 'Registrant Application Purpose:\s*(.+)', + 'registrant_nexus_category': 'Registrant Nexus Category:\s*(.+)', + 'admin_id': 'Administrative Contact ID:\s*(.+)', + 'admin_name': 'Administrative Contact Name:\s*(.+)', + 'admin_address1': 'Administrative Contact Address1:\s*(.+)', + 'admin_address2': 'Administrative Contact Address2:\s*(.+)', + 'admin_city': 'Administrative Contact City:\s*(.+)', + 'admin_state_province': 'Administrative Contact State/Province:\s*(.+)', + 'admin_postal_code': 'Administrative Contact Postal Code:\s*(.+)', + 'admin_country': 'Administrative Contact Country:\s*(.+)', + 'admin_country_code': 'Administrative Contact Country Code:\s*(.+)', + 'admin_phone_number': 'Administrative Contact Phone Number:\s*(.+)', + 'admin_email': 'Administrative Contact Email:\s*(.+)', + 'admin_application_purpose': 'Administrative Application Purpose:\s*(.+)', + 'admin_nexus_category': 'Administrative Nexus Category:\s*(.+)', + 'billing_id': 'Billing Contact ID:\s*(.+)', + 'billing_name': 'Billing Contact Name:\s*(.+)', + 'billing_address1': 'Billing Contact Address1:\s*(.+)', + 'billing_address2': 'Billing Contact Address2:\s*(.+)', + 'billing_city': 'Billing Contact City:\s*(.+)', + 'billing_state_province': 'Billing Contact State/Province:\s*(.+)', + 'billing_postal_code': 'Billing Contact Postal Code:\s*(.+)', + 'billing_country': 'Billing Contact Country:\s*(.+)', + 'billing_country_code': 'Billing Contact Country Code:\s*(.+)', + 'billing_phone_number': 'Billing Contact Phone Number:\s*(.+)', + 'billing_email': 'Billing Contact Email:\s*(.+)', + 'billing_application_purpose': 'Billing Application Purpose:\s*(.+)', + 'billing_nexus_category': 'Billing Nexus Category:\s*(.+)', + 'tech_id': 'Technical Contact ID:\s*(.+)', + 'tech_name': 'Technical Contact Name:\s*(.+)', + 'tech_address1': 'Technical Contact Address1:\s*(.+)', + 'tech_address2': 'Technical Contact Address2:\s*(.+)', + 'tech_city': 'Technical Contact City:\s*(.+)', + 'tech_state_province': 'Technical Contact State/Province:\s*(.+)', + 'tech_postal_code': 'Technical Contact Postal Code:\s*(.+)', + 'tech_country': 'Technical Contact Country:\s*(.+)', + 'tech_country_code': 'Technical Contact Country Code:\s*(.+)', + 'tech_phone_number': 'Technical Contact Phone Number:\s*(.+)', + 'tech_email': 'Technical Contact Email:\s*(.+)', + 'tech_application_purpose': 'Technical Application Purpose:\s*(.+)', + 'tech_nexus_category': 'Technical Nexus Category:\s*(.+)', + 'name_servers': 'Name Server:\s*(.+)', # list of name servers + 'created_by_registrar': 'Created by Registrar:\s*(.+)', + 'last_updated_by_registrar': 'Last Updated by Registrar:\s*(.+)', + 'creation_date': 'Domain Registration Date:\s*(.+)', + 'expiration_date': 'Domain Expiration Date:\s*(.+)', + 'updated_date': 'Domain Last Updated Date:\s*(.+)', + } + def __init__(self, domain, text): + if 'Not found:' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisPl(WhoisEntry): + """Whois parser for .uk domains + """ + regex = { + 'domain_name': 'DOMAIN NAME:\s*(.+)\n', + 'registrar': 'REGISTRAR:\n\s*(.+)', + 'registrar_url': 'URL:\s*(.+)', # not available + 'status': 'Registration status:\n\s*(.+)', # not available + 'registrant_name': 'Registrant:\n\s*(.+)', # not available + 'creation_date': 'created:\s*(.+)\n', + 'expiration_date': 'renewal date:\s*(.+)', + 'updated_date': 'last modified:\s*(.+)\n', + } + def __init__(self, domain, text): + if 'Not found:' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisMe(WhoisEntry): + """Whois parser for .me domains + """ + regex = { + 'domain_id': 'Domain ID:(.+)', + 'domain_name': 'Domain Name:(.+)', + 'creation_date': 'Domain Create Date:(.+)', + 'updated_date': 'Domain Last Updated Date:(.+)', + 'expiration_date': 'Domain Expiration Date:(.+)', + 'transfer_date': 'Last Transferred Date:(.+)', + 'trademark_name': 'Trademark Name:(.+)', + 'trademark_country': 'Trademark Country:(.+)', + 'trademark_number': 'Trademark Number:(.+)', + 'trademark_application_date': 'Date Trademark Applied For:(.+)', + 'trademark_registration_date': 'Date Trademark Registered:(.+)', + 'registrar': 'Sponsoring Registrar:(.+)', + 'created_by': 'Created by:(.+)', + 'updated_by': 'Last Updated by Registrar:(.+)', + 'status': 'Domain Status:(.+)', # list of statuses + 'registrant_id': 'Registrant ID:(.+)', + 'registrant_name': 'Registrant Name:(.+)', + 'registrant_org': 'Registrant Organization:(.+)', + 'registrant_address': 'Registrant Address:(.+)', + 'registrant_address2': 'Registrant Address2:(.+)', + 'registrant_address3': 'Registrant Address3:(.+)', + 'registrant_city': 'Registrant City:(.+)', + 'registrant_state_province': 'Registrant State/Province:(.+)', + 'registrant_country': 'Registrant Country/Economy:(.+)', + 'registrant_postal_code': 'Registrant Postal Code:(.+)', + 'registrant_phone': 'Registrant Phone:(.+)', + 'registrant_phone_ext': 'Registrant Phone Ext\.:(.+)', + 'registrant_fax': 'Registrant FAX:(.+)', + 'registrant_fax_ext': 'Registrant FAX Ext\.:(.+)', + 'registrant_email': 'Registrant E-mail:(.+)', + 'admin_id': 'Admin ID:(.+)', + 'admin_name': 'Admin Name:(.+)', + 'admin_org': 'Admin Organization:(.+)', + 'admin_address': 'Admin Address:(.+)', + 'admin_address2': 'Admin Address2:(.+)', + 'admin_address3': 'Admin Address3:(.+)', + 'admin_city': 'Admin City:(.+)', + 'admin_state_province': 'Admin State/Province:(.+)', + 'admin_country': 'Admin Country/Economy:(.+)', + 'admin_postal_code': 'Admin Postal Code:(.+)', + 'admin_phone': 'Admin Phone:(.+)', + 'admin_phone_ext': 'Admin Phone Ext\.:(.+)', + 'admin_fax': 'Admin FAX:(.+)', + 'admin_fax_ext': 'Admin FAX Ext\.:(.+)', + 'admin_email': 'Admin E-mail:(.+)', + 'tech_id': 'Tech ID:(.+)', + 'tech_name': 'Tech Name:(.+)', + 'tech_org': 'Tech Organization:(.+)', + 'tech_address': 'Tech Address:(.+)', + 'tech_address2': 'Tech Address2:(.+)', + 'tech_address3': 'Tech Address3:(.+)', + 'tech_city': 'Tech City:(.+)', + 'tech_state_province': 'Tech State/Province:(.+)', + 'tech_country': 'Tech Country/Economy:(.+)', + 'tech_postal_code': 'Tech Postal Code:(.+)', + 'tech_phone': 'Tech Phone:(.+)', + 'tech_phone_ext': 'Tech Phone Ext\.:(.+)', + 'tech_fax': 'Tech FAX:(.+)', + 'tech_fax_ext': 'Tech FAX Ext\.:(.+)', + 'tech_email': 'Tech E-mail:(.+)', + 'name_servers': 'Nameservers:(.+)', # list of name servers + } + def __init__(self, domain, text): + if 'NOT FOUND' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisUk(WhoisEntry): + """Whois parser for .uk domains + """ + regex = { + 'domain_name': 'Domain name:\n\s*(.+)', + 'registrar': 'Registrar:\n\s*(.+)', + 'registrar_url': 'URL:\s*(.+)', + 'status': 'Registration status:\n\s*(.+)', # list of statuses + 'registrant_name': 'Registrant:\n\s*(.+)', + 'creation_date': 'Registered on:\s*(.+)', + 'expiration_date': 'Expiry date:\s*(.+)', + 'updated_date': 'Last updated:\s*(.+)', + 'name_servers': 'Name servers:\s*(.+)', + } + def __init__(self, domain, text): + if 'Not found:' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisFr(WhoisEntry): + """Whois parser for .fr domains + """ + regex = { + 'domain_name': 'domain:\s*(.+)', + 'registrar': 'registrar:\s*(.+)', + 'creation_date': 'created:\s*(.+)', + 'expiration_date': 'anniversary:\s*(.+)', + 'name_servers': 'nserver:\s*(.+)', # list of name servers + 'status': 'status:\s*(.+)', # list of statuses + 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses + 'updated_date': 'last-update:\s*(.+)', + } + + def __init__(self, domain, text): + if text.strip() == 'No entries found': + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisFi(WhoisEntry): + """Whois parser for .fi domains + """ + regex = { + 'domain_name': 'domain:\s*([\S]+)', + 'registrant_name': 'descr:\s*([\S\ ]+)', + 'registrant_address': 'address:\s*([\S\ ]+)', + 'registrant_phone': 'phone:\s*([\S\ ]+)', + 'status': 'status:\s*([\S]+)', # list of statuses + 'creation_date': 'created:\s*([\S]+)', + 'updated_date': 'modified:\s*([\S]+)', + 'expiration_date': 'expires:\s*([\S]+)', + 'name_servers': 'nserver:\s*([\S]+) \[(\S+)\]', # list of name servers + 'dnssec': 'dnssec:\s*([\S]+)', # list of name servers + } + def __init__(self, domain, text): + if 'Not found:' in text: + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) + + +class WhoisJp(WhoisEntry): + """Whois parser for .jp domains + """ + regex = { + 'domain_name': 'a\. \[Domain Name\]\s*(.+)', + 'registrant_org': 'g\. \[Organization\](.+)', + 'creation_date': r'\[Registered Date\]\s*(.+)', + 'name_servers': 'p\. \[Name Server\]\s*(.+)', # list of name servers + 'updated_date': '\[Last Update\]\s?(.+)', + 'status': '\[State\]\s*(.+)', # list of statuses + } + + def __init__(self, domain, text): + if text.strip() == 'No entries found': + raise PywhoisError(text) + else: + WhoisEntry.__init__(self, domain, text, self.regex) diff -r 5083c26d8f93 -r c57439b500cb whois/whois.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/whois/whois.py Tue Jan 08 18:23:18 2013 +1100 @@ -0,0 +1,273 @@ +""" +Whois client for python + +transliteration of: +http://www.opensource.apple.com/source/adv_cmds/adv_cmds-138.1/whois/whois.c + +Copyright (c) 2010 Chris Wolf + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + Last edited by: $Author$ + on: $DateTime$ + Revision: $Revision$ + Id: $Id$ + Author: Chris Wolf +""" +import sys +import socket +import optparse +#import pdb + +def enforce_ascii(a): + if isinstance(a, str) or isinstance(a, unicode): + # return a.encode('ascii', 'replace') + r = "" + for i in a: + if ord(i) >= 128: + r += "?" + else: + r += i + return r + else: + return a + +class NICClient(object) : + + ABUSEHOST = "whois.abuse.net" + NICHOST = "whois.crsnic.net" + INICHOST = "whois.networksolutions.com" + DNICHOST = "whois.nic.mil" + GNICHOST = "whois.nic.gov" + ANICHOST = "whois.arin.net" + LNICHOST = "whois.lacnic.net" + RNICHOST = "whois.ripe.net" + PNICHOST = "whois.apnic.net" + MNICHOST = "whois.ra.net" + QNICHOST_TAIL = ".whois-servers.net" + SNICHOST = "whois.6bone.net" + BNICHOST = "whois.registro.br" + NORIDHOST = "whois.norid.no" + IANAHOST = "whois.iana.org" + DENICHOST = "de.whois-servers.net" + DEFAULT_PORT = "nicname" + WHOIS_SERVER_ID = "Whois Server:" + WHOIS_ORG_SERVER_ID = "Registrant Street1:Whois Server:" + + + WHOIS_RECURSE = 0x01 + WHOIS_QUICK = 0x02 + + ip_whois = [ LNICHOST, RNICHOST, PNICHOST, BNICHOST ] + + def __init__(self) : + self.use_qnichost = False + + def findwhois_server(self, buf, hostname): + """Search the initial TLD lookup results for the regional-specifc + whois server for getting contact details. + """ + #print 'finding whois server' + #print 'parameters:', buf, 'hostname', hostname + nhost = None + parts_index = 1 + start = buf.find(NICClient.WHOIS_SERVER_ID) + #print 'start', start + if (start == -1): + start = buf.find(NICClient.WHOIS_ORG_SERVER_ID) + parts_index = 2 + + if (start > -1): + end = buf[start:].find('\n') + #print 'end:', end + whois_line = buf[start:end+start] + #print 'whois_line', whois_line + nhost = whois_line.split(NICClient.WHOIS_SERVER_ID+' ').pop() + nhost = nhost.split('http://').pop() + #if the whois address is domain.tld/something then + #s.connect((hostname, 43)) does not work + if nhost.count('/') > 0: + nhost = None + #print 'nhost:',nhost + elif (hostname == NICClient.ANICHOST): + for nichost in NICClient.ip_whois: + if (buf.find(nichost) != -1): + nhost = nichost + break + return nhost + + def whois(self, query, hostname, flags): + """Perform initial lookup with TLD whois server + then, if the quick flag is false, search that result + for the region-specifc whois server and do a lookup + there for contact details + """ + #print 'Performing the whois' + #print 'parameters given:', query, hostname, flags + #pdb.set_trace() + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((hostname, 43)) + """send takes bytes as an input + """ + queryBytes = None + if (hostname == NICClient.DENICHOST): + #print 'the domain is in NIC DENIC' + queryBytes = ("-T dn,ace -C UTF-8 " + query + "\r\n").encode() + #print 'queryBytes:', queryBytes + else: + queryBytes = (query + "\r\n").encode() + s.send(queryBytes) + """recv returns bytes + """ + #print s + response = b'' + while True: + d = s.recv(4096) + response += d + if not d: + break + s.close() + #pdb.set_trace() + nhost = None + #print 'response', response + response = enforce_ascii(response) + if (flags & NICClient.WHOIS_RECURSE and nhost == None): + #print 'Inside first if' + nhost = self.findwhois_server(response.decode(), hostname) + #print 'nhost is:', nhost + if (nhost != None): + #print 'inside second if' + response += self.whois(query, nhost, 0) + #print 'response', response + #print 'returning whois response' + return response.decode() + + def choose_server(self, domain): + """Choose initial lookup NIC host""" + if (domain.endswith("-NORID")): + return NICClient.NORIDHOST + pos = domain.rfind('.') + if (pos == -1): + return None + tld = domain[pos+1:] + if (tld[0].isdigit()): + return NICClient.ANICHOST + + return tld + NICClient.QNICHOST_TAIL + + def whois_lookup(self, options, query_arg, flags): + """Main entry point: Perform initial lookup on TLD whois server, + or other server to get region-specific whois server, then if quick + flag is false, perform a second lookup on the region-specific + server for contact records""" + #print 'whois_lookup' + nichost = None + #pdb.set_trace() + # this would be the case when this function is called by other than main + if (options == None): + options = {} + + if ( (not 'whoishost' in options or options['whoishost'] == None) + and (not 'country' in options or options['country'] == None)): + self.use_qnichost = True + options['whoishost'] = NICClient.NICHOST + if ( not (flags & NICClient.WHOIS_QUICK)): + flags |= NICClient.WHOIS_RECURSE + + if ('country' in options and options['country'] != None): + result = self.whois(query_arg, options['country'] + NICClient.QNICHOST_TAIL, flags) + elif (self.use_qnichost): + nichost = self.choose_server(query_arg) + if (nichost != None): + result = self.whois(query_arg, nichost, flags) + else: + result = self.whois(query_arg, options['whoishost'], flags) + #print 'whois_lookup finished' + return result +#---- END OF NICClient class def --------------------- + +def parse_command_line(argv): + """Options handling mostly follows the UNIX whois(1) man page, except + long-form options can also be used. + """ + flags = 0 + + usage = "usage: %prog [options] name" + + parser = optparse.OptionParser(add_help_option=False, usage=usage) + parser.add_option("-a", "--arin", action="store_const", + const=NICClient.ANICHOST, dest="whoishost", + help="Lookup using host " + NICClient.ANICHOST) + parser.add_option("-A", "--apnic", action="store_const", + const=NICClient.PNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.PNICHOST) + parser.add_option("-b", "--abuse", action="store_const", + const=NICClient.ABUSEHOST, dest="whoishost", + help="Lookup using host " + NICClient.ABUSEHOST) + parser.add_option("-c", "--country", action="store", + type="string", dest="country", + help="Lookup using country-specific NIC") + parser.add_option("-d", "--mil", action="store_const", + const=NICClient.DNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.DNICHOST) + parser.add_option("-g", "--gov", action="store_const", + const=NICClient.GNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.GNICHOST) + parser.add_option("-h", "--host", action="store", + type="string", dest="whoishost", + help="Lookup using specified whois host") + parser.add_option("-i", "--nws", action="store_const", + const=NICClient.INICHOST, dest="whoishost", + help="Lookup using host " + NICClient.INICHOST) + parser.add_option("-I", "--iana", action="store_const", + const=NICClient.IANAHOST, dest="whoishost", + help="Lookup using host " + NICClient.IANAHOST) + parser.add_option("-l", "--lcanic", action="store_const", + const=NICClient.LNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.LNICHOST) + parser.add_option("-m", "--ra", action="store_const", + const=NICClient.MNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.MNICHOST) + parser.add_option("-p", "--port", action="store", + type="int", dest="port", + help="Lookup using specified tcp port") + parser.add_option("-Q", "--quick", action="store_true", + dest="b_quicklookup", + help="Perform quick lookup") + parser.add_option("-r", "--ripe", action="store_const", + const=NICClient.RNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.RNICHOST) + parser.add_option("-R", "--ru", action="store_const", + const="ru", dest="country", + help="Lookup Russian NIC") + parser.add_option("-6", "--6bone", action="store_const", + const=NICClient.SNICHOST, dest="whoishost", + help="Lookup using host " + NICClient.SNICHOST) + parser.add_option("-?", "--help", action="help") + + + return parser.parse_args(argv) + +if __name__ == "__main__": + flags = 0 + nic_client = NICClient() + (options, args) = parse_command_line(sys.argv) + if (options.b_quicklookup is True): + flags = flags|NICClient.WHOIS_QUICK + print(nic_client.whois_lookup(options.__dict__, args[1], flags))