# HG changeset patch # User Richard Penman # Date 1458137916 -3600 # Node ID 359baebcf0e83ec5cd5a3a2f8915535f5b58114e # Parent fa9650e9ec23061db70a8f50cd127cb931b7af96# Parent 95f170b4fd131a629a95f6054157587a8adab660 merged diff -r fa9650e9ec23 -r 359baebcf0e8 README.rst --- a/README.rst Wed Mar 16 15:08:15 2016 +0100 +++ b/README.rst Wed Mar 16 15:18:36 2016 +0100 @@ -6,7 +6,7 @@ - Able to extract data for all the popular TLDs (com, org, net, ...) - Query a WHOIS server directly instead of going through an intermediate web service like many others do. -- Works with Python 2.4+ and no external dependencies +- Works with Python 2 & 3 @@ -37,21 +37,34 @@ Install from pypi: -.. sourcecode:: python +.. sourcecode:: bash pip install python-whois Or checkout latest version from repository: -.. sourcecode:: python +.. sourcecode:: bash hg clone https://bitbucket.org/richardpenman/pywhois +Note that then you will need to manually install the futures module, which allows supporting both Python 2 & 3: + + +.. sourcecode:: bash + + pip install futures + + Changelog ========= +0.6 - 2015-03-02: + +* support added for python 3 +* updated TLD list + 0.5 - 2015-09-05: * added native client, which now handles whois requests by default diff -r fa9650e9ec23 -r 359baebcf0e8 setup.py --- a/setup.py Wed Mar 16 15:08:15 2016 +0100 +++ b/setup.py Wed Mar 16 15:18:36 2016 +0100 @@ -1,13 +1,16 @@ import sys, os import setuptools -version = '0.5.2' +version = '0.6.1' setuptools.setup( name='python-whois', version=version, description="Whois querying and parsing of domain registration information.", long_description='', + install_requires=[ + 'future', + ], classifiers=[ 'Environment :: Web Environment', 'Intended Audience :: Developers', diff -r fa9650e9ec23 -r 359baebcf0e8 test/test_main.py --- a/test/test_main.py Wed Mar 16 15:08:15 2016 +0100 +++ b/test/test_main.py Wed Mar 16 15:18:36 2016 +0100 @@ -1,5 +1,12 @@ # coding=utf-8 +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * import unittest from whois import extract_domain diff -r fa9650e9ec23 -r 359baebcf0e8 test/test_nicclient.py --- a/test/test_nicclient.py Wed Mar 16 15:08:15 2016 +0100 +++ b/test/test_nicclient.py Wed Mar 16 15:18:36 2016 +0100 @@ -1,5 +1,12 @@ # coding=utf-8 +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * import unittest from whois.whois import NICClient diff -r fa9650e9ec23 -r 359baebcf0e8 test/test_parser.py --- a/test/test_parser.py Wed Mar 16 15:08:15 2016 +0100 +++ b/test/test_parser.py Wed Mar 16 15:18:36 2016 +0100 @@ -1,3 +1,10 @@ +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * import unittest import os @@ -69,7 +76,7 @@ result = results.get(key) expected = expected_results.get(key) if expected != result: - print "%s \t(%s):\t %s != %s" % (domain, key, result, expected) + print("%s \t(%s):\t %s != %s" % (domain, key, result, expected)) fail += 1 if fail: diff -r fa9650e9ec23 -r 359baebcf0e8 whois/__init__.py --- a/whois/__init__.py Wed Mar 16 15:08:15 2016 +0100 +++ b/whois/__init__.py Wed Mar 16 15:18:36 2016 +0100 @@ -1,10 +1,17 @@ +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals +from __future__ import division +from future import standard_library +standard_library.install_aliases() +from builtins import * import re import sys import os import subprocess import socket -from parser import WhoisEntry -from whois import NICClient +from .parser import WhoisEntry +from .whois import NICClient def whois(url, command=False): @@ -45,28 +52,28 @@ tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt') suffixes = [ - line.lower().strip() + line.lower().strip().encode('utf-8') for line in open(tlds_path).readlines() if not line.startswith('#') ] - if type(url) is not unicode: + if not isinstance(url, str): url = url.decode('utf-8') - url = re.sub('^.*://', '', url.encode('idna')).split('/')[0].lower() + url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower() domain = [] - for section in url.split('.'): + for section in url.split(b'.'): if section in suffixes: domain.append(section) else: domain = [section] - return '.'.join(domain).decode('idna').encode('utf-8') + return b'.'.join(domain).decode('idna') if __name__ == '__main__': try: url = sys.argv[1] except IndexError: - print 'Usage: %s url' % sys.argv[0] + print('Usage: %s url' % sys.argv[0]) else: - print whois(url) + print(whois(url)) diff -r fa9650e9ec23 -r 359baebcf0e8 whois/data/tlds.txt --- a/whois/data/tlds.txt Wed Mar 16 15:08:15 2016 +0100 +++ b/whois/data/tlds.txt Wed Mar 16 15:18:36 2016 +0100 @@ -1,26 +1,51 @@ -# Version 2014081200, Last Updated Tue Aug 12 07:07:01 2014 UTC +# Version 2016011500, Last Updated Fri Jan 15 07:07:01 2016 UTC +AAA +AARP +ABB +ABBOTT +ABOGADO AC ACADEMY +ACCENTURE +ACCOUNTANT ACCOUNTANTS +ACO ACTIVE ACTOR AD +ADS +ADULT AE +AEG AERO AF +AFL AG AGENCY AI +AIG AIRFORCE +AIRTEL AL +ALLFINANZ +ALSACE AM -AN +AMICA +AMSTERDAM +ANALYTICS +ANDROID AO +APARTMENTS +APP +APPLE AQ +AQUARELLE AR +ARAMCO ARCHI ARMY ARPA +ARTE AS ASIA ASSOCIATES @@ -28,45 +53,89 @@ ATTORNEY AU AUCTION +AUDI AUDIO +AUTHOR +AUTO AUTOS AW AX AXA AZ +AZURE BA +BAIDU +BAND +BANK BAR +BARCELONA +BARCLAYCARD +BARCLAYS BARGAINS +BAUHAUS BAYERN BB +BBC +BBVA +BCN BD BE +BEATS BEER +BENTLEY BERLIN BEST +BET BF BG BH +BHARTI BI +BIBLE BID BIKE +BING +BINGO BIO BIZ BJ BLACK BLACKFRIDAY +BLOOMBERG BLUE BM +BMS BMW BN +BNL +BNPPARIBAS BO +BOATS +BOEHRINGER +BOM +BOND +BOO +BOOK +BOOTS +BOSCH +BOSTIK +BOT BOUTIQUE BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER BRUSSELS BS BT +BUDAPEST +BUGATTI BUILD BUILDERS +BUSINESS +BUY BUZZ BV BW @@ -75,61 +144,103 @@ BZH CA CAB +CAFE +CAL +CALL CAMERA CAMP CANCERRESEARCH +CANON CAPETOWN CAPITAL +CAR +CARAVAN CARDS CARE CAREER CAREERS +CARS +CARTIER +CASA CASH +CASINO CAT CATERING +CBA +CBN CC CD +CEB CENTER CEO +CERN CF +CFA +CFD CG CH +CHANEL +CHANNEL +CHAT CHEAP +CHLOE CHRISTMAS +CHROME CHURCH CI +CIPRIANI +CIRCLE +CISCO CITIC CITY +CITYEATS CK CL CLAIMS CLEANING +CLICK CLINIC +CLINIQUE CLOTHING +CLOUD CLUB +CLUBMED CM CN CO +COACH CODES COFFEE COLLEGE COLOGNE COM +COMMBANK COMMUNITY COMPANY +COMPARE COMPUTER +COMSEC CONDOS CONSTRUCTION CONSULTING +CONTACT CONTRACTORS COOKING COOL COOP +CORSICA COUNTRY +COUPONS +COURSES CR CREDIT CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS CRUISES +CSC CU CUISINELLA CV @@ -137,17 +248,31 @@ CX CY CYMRU +CYOU CZ +DABUR +DAD DANCE +DATE DATING +DATSUN +DAY +DCLK DE +DEALER DEALS DEGREE +DELIVERY +DELL +DELTA DEMOCRAT DENTAL DENTIST DESI +DESIGN +DEV DIAMONDS +DIET DIGITAL DIRECT DIRECTORY @@ -157,277 +282,494 @@ DM DNP DO +DOCS +DOG +DOHA DOMAINS +DOOSAN +DOWNLOAD +DRIVE +DUBAI DURBAN +DVAG DZ +EARTH +EAT EC EDU EDUCATION EE EG EMAIL +EMERCK +ENERGY ENGINEER ENGINEERING ENTERPRISES +EPSON EQUIPMENT ER +ERNI ES +ESQ ESTATE ET EU +EUROVISION EUS EVENTS +EVERBANK EXCHANGE EXPERT EXPOSED +EXPRESS +FAGE FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS FARM +FASHION +FAST FEEDBACK +FERRERO FI +FILM +FINAL FINANCE FINANCIAL +FIRESTONE +FIRMDALE FISH FISHING +FIT FITNESS FJ FK FLIGHTS FLORIST +FLOWERS +FLSMIDTH +FLY FM FO FOO +FOOTBALL +FORD +FOREX +FORSALE +FORUM FOUNDATION +FOX FR +FRESENIUS +FRL FROGANS FUND FURNITURE FUTBOL +FYI GA GAL GALLERY +GAME +GARDEN GB +GBIZ GD +GDN GE +GEA GENT +GENTING GF GG +GGEE GH GI GIFT GIFTS GIVES +GIVING GL GLASS +GLE GLOBAL GLOBO GM +GMAIL GMO +GMX GN +GOLD +GOLDPOINT +GOLF +GOO +GOOG +GOOGLE GOP +GOT GOV GP GQ GR +GRAINGER GRAPHICS GRATIS GREEN GRIPE +GROUP GS GT GU +GUCCI +GUGE GUIDE GUITARS GURU GW GY HAMBURG +HANGOUT HAUS HEALTHCARE +HELP +HERE +HERMES HIPHOP +HITACHI HIV HK HM HN +HOCKEY HOLDINGS HOLIDAY +HOMEDEPOT HOMES +HONDA HORSE HOST +HOSTING +HOTELES +HOTMAIL HOUSE +HOW HR +HSBC HT HU +HYUNDAI +IBM +ICBC +ICE +ICU ID IE +IFM +IINET IL IM +IMMO IMMOBILIEN IN INDUSTRIES +INFINITI INFO +ING INK INSTITUTE +INSURANCE INSURE INT INTERNATIONAL INVESTMENTS IO +IPIRANGA IQ IR +IRISH IS +ISELECT +IST +ISTANBUL IT +ITAU +IWC +JAGUAR +JAVA +JCB JE JETZT +JEWELRY +JLC +JLL JM +JMP JO JOBS JOBURG +JOT +JOY JP +JPRS JUEGOS KAUFEN +KDDI KE +KFH KG KH KI +KIA KIM +KINDER KITCHEN KIWI KM KN KOELN +KOMATSU KP +KPN KR KRD KRED KW KY +KYOTO KZ LA LACAIXA +LAMBORGHINI +LAMER +LANCASTER LAND +LANDROVER +LASALLE +LAT +LATROBE +LAW LAWYER LB LC +LDS LEASE +LECLERC +LEGAL +LEXUS LGBT LI +LIAISON +LIDL LIFE +LIFESTYLE LIGHTING +LIKE LIMITED LIMO +LINCOLN +LINDE LINK +LIVE +LIVING +LIXIL LK +LOAN LOANS +LOL LONDON +LOTTE LOTTO +LOVE LR LS LT +LTD +LTDA LU +LUPIN LUXE LUXURY LV LY MA +MADRID +MAIF MAISON +MAKEUP +MAN MANAGEMENT MANGO MARKET MARKETING +MARKETS +MARRIOTT +MBA MC MD ME +MED MEDIA MEET MELBOURNE +MEME +MEMORIAL +MEN MENU +MEO MG MH MIAMI +MICROSOFT MIL MINI MK ML MM +MMA MN MO MOBI +MOBILY MODA MOE +MOI +MOM MONASH +MONEY +MONTBLANC +MORMON MORTGAGE MOSCOW MOTORCYCLES +MOV +MOVIE +MOVISTAR MP MQ MR MS MT +MTN +MTPC +MTR MU MUSEUM +MUTUELLE MV MW MX MY MZ NA +NADEX NAGOYA NAME NAVY NC NE +NEC NET +NETBANK +NETWORK NEUSTAR +NEW +NEWS +NEXUS NF NG NGO NHK NI +NICO NINJA +NISSAN NL NO +NOKIA +NORTON +NOWRUZ NP NR NRA NRW +NTT NU NYC NZ +OBI +OFFICE OKINAWA OM +OMEGA +ONE ONG ONL +ONLINE +OOO +ORACLE +ORANGE ORG ORGANIC +ORIGINS +OSAKA +OTSUKA OVH PA +PAGE +PANERAI PARIS +PARS PARTNERS PARTS +PARTY PE +PET PF PG PH +PHARMACY +PHILIPS PHOTO PHOTOGRAPHY PHOTOS PHYSIO +PIAGET PICS +PICTET PICTURES +PID +PIN +PING PINK +PIZZA PK PL PLACE +PLAY +PLAYSTATION PLUMBING +PLUS PM PN +POHL +POKER +PORN POST PR PRAXI PRESS PRO +PROD PRODUCTIONS +PROF +PROMO PROPERTIES +PROPERTY +PROTECTION PS PT PUB @@ -436,67 +778,142 @@ QA QPON QUEBEC +RACING RE +READ REALTOR +REALTY RECIPES RED +REDSTONE +REDUMBRELLA REHAB REISE REISEN +REIT REN +RENT RENTALS REPAIR REPORT REPUBLICAN REST RESTAURANT +REVIEW REVIEWS +REXROTH RICH +RICOH RIO +RIP RO +ROCHER ROCKS RODEO +ROOM RS +RSVP RU RUHR +RUN RW +RWE RYUKYU SA SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SAPO SARL +SAS +SAXO SB +SBS SC +SCA SCB +SCHAEFFLER SCHMIDT +SCHOLARSHIPS +SCHOOL SCHULE +SCHWARZ +SCIENCE +SCOR SCOT SD SE +SEAT +SECURITY +SEEK +SELECT +SENER SERVICES +SEVEN +SEW +SEX SEXY +SFR SG SH +SHARP +SHELL +SHIA SHIKSHA SHOES +SHOW +SHRIRAM SI SINGLES +SITE SJ SK +SKI +SKIN +SKY +SKYPE SL SM +SMILE SN +SNCF SO +SOCCER SOCIAL SOFTWARE SOHU SOLAR SOLUTIONS +SONY SOY SPACE SPIEGEL +SPREADBETTING SR +SRL ST +STADA +STAR +STARHUB +STATEFARM +STATOIL +STC +STCGROUP +STOCKHOLM +STORAGE +STUDIO +STUDY +STYLE SU +SUCKS SUPPLIES SUPPLY SUPPORT @@ -504,22 +921,41 @@ SURGERY SUZUKI SV +SWATCH +SWISS SX SY +SYDNEY +SYMANTEC SYSTEMS SZ +TAB +TAIPEI +TATAMOTORS TATAR TATTOO TAX +TAXI TC +TCI TD +TEAM +TECH TECHNOLOGY TEL +TELEFONICA +TEMASEK +TENNIS TF TG TH +THD +THEATER +THEATRE +TICKETS TIENDA TIPS +TIRES TIROL TJ TK @@ -531,65 +967,110 @@ TOKYO TOOLS TOP +TORAY +TOSHIBA +TOURS TOWN +TOYOTA TOYS -TP TR TRADE +TRADING TRAINING TRAVEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV TT +TUBE +TUI +TUSHU TV TW TZ UA +UBS UG UK UNIVERSITY UNO +UOL US UY UZ VA VACATIONS +VANA VC VE VEGAS VENTURES +VERISIGN VERSICHERUNG VET VG VI VIAJES +VIDEO VILLAS +VIN +VIP +VIRGIN VISION +VISTA +VISTAPRINT +VIVA VLAANDEREN VN VODKA +VOLKSWAGEN VOTE VOTING VOTO VOYAGE VU WALES +WALTER WANG +WANGGOU WATCH +WATCHES +WEATHER WEBCAM +WEBER WEBSITE WED +WEDDING +WEIR WF WHOSWHO WIEN WIKI WILLIAMHILL +WIN +WINDOWS +WINE +WME +WORK WORKS +WORLD WS WTC WTF +XBOX +XEROX +XIN +XN--11B4C3D +XN--1QQW23A +XN--30RR7Y XN--3BST00M XN--3DS443G XN--3E0B707E +XN--3PXU8K +XN--42C2D9A XN--45BRJ9C +XN--45Q11C XN--4GBRIM XN--55QW42G XN--55QX5D @@ -600,65 +1081,116 @@ XN--80ASEHDB XN--80ASWG XN--90A3AC +XN--90AIS +XN--9DBQ2A +XN--9ET52U +XN--B4W605FERD XN--C1AVG +XN--C2BR7G XN--CG4BKI XN--CLCHC0EA0B2G2A9GCD XN--CZR694B +XN--CZRS0T XN--CZRU2D XN--D1ACJ3B +XN--D1ALF +XN--ECKVDTC9D +XN--EFVY88H +XN--ESTV75G +XN--FHBEI XN--FIQ228C5HS XN--FIQ64B XN--FIQS8S XN--FIQZ9S +XN--FJQ720A +XN--FLW351E XN--FPCRJ9C3D XN--FZC2C9E2C XN--GECRJ9C XN--H2BRJ9C +XN--HXT814E XN--I1B6B1A6A2E +XN--IMR513N XN--IO0A7I +XN--J1AEF XN--J1AMH XN--J6W193G +XN--JLQ61U9W7B +XN--KCRX77D1X4A XN--KPRW13D XN--KPRY57D +XN--KPU716F XN--KPUT3I XN--L1ACC XN--LGBBAT1AD8J XN--MGB9AWBF +XN--MGBA3A3EJT XN--MGBA3A4F16A XN--MGBAAM7A8H XN--MGBAB2BD XN--MGBAYH7GPA +XN--MGBB9FBPOB XN--MGBBH1A71E XN--MGBC0A9AZCG XN--MGBERP4A5D4AR +XN--MGBPL2FH +XN--MGBT3DHD +XN--MGBTX2B XN--MGBX4CD0AB +XN--MK1BU44C +XN--MXTQ1M XN--NGBC5AZD +XN--NGBE9E0A +XN--NODE XN--NQV7F XN--NQV7FS00EMA +XN--NYQY26A XN--O3CW4H XN--OGBPF8FL +XN--P1ACF XN--P1AI +XN--PBT977C XN--PGBS0DH +XN--PSSY2U XN--Q9JYB4C +XN--QCKA1PMC +XN--QXAM XN--RHQV96G XN--S9BRJ9C XN--SES554G +XN--T60B56A +XN--TCKWE XN--UNUP4Y +XN--VERMGENSBERATER-CTB +XN--VERMGENSBERATUNG-PWB +XN--VHQUV +XN--VUQ861B XN--WGBH1C XN--WGBL6A +XN--XHQ521B XN--XKC2AL3HYE2A XN--XKC2DL3A5EE0H +XN--Y9A3AQ XN--YFRO4I67O XN--YGBI2AMMX XN--ZFR164B +XPERIA XXX XYZ YACHTS +YAMAXUN YANDEX YE +YODOBASHI +YOGA YOKOHAMA +YOUTUBE YT ZA +ZARA +ZERO +ZIP ZM ZONE +ZUERICH ZW diff -r fa9650e9ec23 -r 359baebcf0e8 whois/parser.py --- a/whois/parser.py Wed Mar 16 15:08:15 2016 +0100 +++ b/whois/parser.py Wed Mar 16 15:18:36 2016 +0100 @@ -5,17 +5,28 @@ # This module is part of pywhois and is released under # the MIT license: http://www.opensource.org/licenses/mit-license.php +from __future__ import absolute_import +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from future import standard_library +standard_library.install_aliases() +from builtins import * +from builtins import str +from past.builtins import basestring import json from datetime import datetime import re try: import dateutil.parser as dp - from time_zones import tz_data + from .time_zones import tz_data DATEUTIL = True except ImportError: DATEUTIL = False +EMAIL_REGEX = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?" + KNOWN_FORMATS = [ '%d-%b-%Y', # 02-jan-2000 '%Y-%m-%d', # 2000-01-02 @@ -45,7 +56,7 @@ def datetime_parse(s): for known_format in KNOWN_FORMATS: try: - s = datetime.strptime(s.strip(), known_format) + s = datetime.strptime(s, known_format) break except ValueError as e: pass # Wrong format, keep trying @@ -57,7 +68,7 @@ if DATEUTIL: try: return dp.parse( - s.strip(), + s, tzinfos=tz_data, dayfirst=dayfirst, yearfirst=yearfirst @@ -74,24 +85,24 @@ # regular expressions to extract domain data from whois profile # child classes will override this _regex = { - 'domain_name': 'Domain Name:\s?(.+)', - 'registrar': 'Registrar:\s?(.+)', - 'whois_server': 'Whois Server:\s?(.+)', - 'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server - 'updated_date': 'Updated Date:\s?(.+)', - 'creation_date': 'Creation Date:\s?(.+)', - 'expiration_date': 'Expir\w+ Date:\s?(.+)', - 'name_servers': 'Name Server:\s?(.+)', # list of name servers - 'status': 'Status:\s?(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email s - 'dnssec': 'dnssec:\s*([\S]+)', - 'name': 'Registrant Name:\s*(.+)', - 'org': 'Registrant\s*Organization:\s*(.+)', - 'address': 'Registrant Street:\s*(.+)', - 'city': 'Registrant City:\s*(.+)', - 'state': 'Registrant State/Province:\s*(.+)', - 'zipcode': 'Registrant Postal Code:\s*(.+)', - 'country': 'Registrant Country:\s*(.+)', + 'domain_name': 'Domain Name: *(.+)', + 'registrar': 'Registrar: *(.+)', + 'whois_server': 'Whois Server: *(.+)', + 'referral_url': 'Referral URL: *(.+)', # http url of whois_server + 'updated_date': 'Updated Date: *(.+)', + 'creation_date': 'Creation Date: *(.+)', + 'expiration_date': 'Expir\w+ Date: *(.+)', + 'name_servers': 'Name Server: *(.+)', # list of name servers + 'status': 'Status: *(.+)', # list of statuses + 'emails': EMAIL_REGEX, # list of email s + 'dnssec': 'dnssec: *([\S]+)', + 'name': 'Registrant Name: *(.+)', + 'org': 'Registrant\s*Organization: *(.+)', + 'address': 'Registrant Street: *(.+)', + 'city': 'Registrant City: *(.+)', + 'state': 'Registrant State/Province: *(.+)', + 'zipcode': 'Registrant Postal Code: *(.+)', + 'country': 'Registrant Country: *(.+)', } dayfirst = False yearfirst = False @@ -110,15 +121,17 @@ """The first time an attribute is called it will be calculated here. The attribute is then set to be accessed directly by subsequent calls. """ - for attr, regex in self._regex.items(): + for attr, regex in list(self._regex.items()): if regex: values = [] for value in re.findall(regex, self.text, re.IGNORECASE): - if isinstance(value, basestring): + value = value.strip() + if value and isinstance(value, basestring): # try casting to date format - value = cast_date(value.strip(), - dayfirst=self.dayfirst, - yearfirst=self.yearfirst) + value = cast_date( + value, + dayfirst=self.dayfirst, + yearfirst=self.yearfirst) if value and value not in values: # avoid duplicates values.append(value) @@ -237,16 +250,16 @@ """Whois parser for .org domains """ regex = { - 'domain_name': 'Domain Name:\s?(.+)', - 'registrar': 'Registrar:\s?(.+)', - 'whois_server': 'Whois Server:\s?(.+)', # empty usually - 'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server: empty usually - 'updated_date': 'Updated Date:\s?(.+)', - 'creation_date': 'Creation Date:\s?(.+)', - 'expiration_date': 'Registry Expiry Date:\s?(.+)', - 'name_servers': 'Name Server:\s?(.+)', # list of name servers - 'status': 'Status:\s?(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses + 'domain_name': 'Domain Name: *(.+)', + 'registrar': 'Registrar: *(.+)', + 'whois_server': 'Whois Server: *(.+)', # empty usually + 'referral_url': 'Referral URL: *(.+)', # http url of whois_server: empty usually + 'updated_date': 'Updated Date: *(.+)', + 'creation_date': 'Creation Date: *(.+)', + 'expiration_date': 'Registry Expiry Date: *(.+)', + 'name_servers': 'Name Server: *(.+)', # list of name servers + 'status': 'Status: *(.+)', # list of statuses + 'emails': EMAIL_REGEX, # list of email addresses } def __init__(self, domain, text): @@ -260,14 +273,14 @@ """Whois parser for .ru domains """ regex = { - 'domain_name': 'domain:\s*(.+)', - 'registrar': 'registrar:\s*(.+)', - 'creation_date': 'created:\s*(.+)', - 'expiration_date': 'paid-till:\s*(.+)', - 'name_servers': 'nserver:\s*(.+)', # list of name servers - 'status': 'state:\s*(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses - 'org': 'org:\s*(.+)' + 'domain_name': 'domain: *(.+)', + 'registrar': 'registrar: *(.+)', + 'creation_date': 'created: *(.+)', + 'expiration_date': 'paid-till: *(.+)', + 'name_servers': 'nserver: *(.+)', # list of name servers + 'status': 'state: *(.+)', # list of statuses + 'emails': EMAIL_REGEX, # list of email addresses + 'org': 'org: *(.+)' } def __init__(self, domain, text): @@ -309,20 +322,20 @@ """Whois parser for .name domains """ regex = { - 'domain_name_id': 'Domain Name ID:\s*(.+)', - 'domain_name': 'Domain Name:\s*(.+)', - 'registrar_id': 'Sponsoring Registrar ID:\s*(.+)', - 'registrar': 'Sponsoring Registrar:\s*(.+)', - 'registrant_id': 'Registrant ID:\s*(.+)', - 'admin_id': 'Admin ID:\s*(.+)', - 'technical_id': 'Tech ID:\s*(.+)', - 'billing_id': 'Billing ID:\s*(.+)', - 'creation_date': 'Created On:\s*(.+)', - 'expiration_date': 'Expires On:\s*(.+)', - 'updated_date': 'Updated On:\s*(.+)', - 'name_server_ids': 'Name Server ID:\s*(.+)', # list of name server ids - 'name_servers': 'Name Server:\s*(.+)', # list of name servers - 'status': 'Domain Status:\s*(.+)', # list of statuses + 'domain_name_id': 'Domain Name ID: *(.+)', + 'domain_name': 'Domain Name: *(.+)', + 'registrar_id': 'Sponsoring Registrar ID: *(.+)', + 'registrar': 'Sponsoring Registrar: *(.+)', + 'registrant_id': 'Registrant ID: *(.+)', + 'admin_id': 'Admin ID: *(.+)', + 'technical_id': 'Tech ID: *(.+)', + 'billing_id': 'Billing ID: *(.+)', + 'creation_date': 'Created On: *(.+)', + 'expiration_date': 'Expires On: *(.+)', + 'updated_date': 'Updated On: *(.+)', + 'name_server_ids': 'Name Server ID: *(.+)', # list of name server ids + 'name_servers': 'Name Server: *(.+)', # list of name servers + 'status': 'Domain Status: *(.+)', # list of statuses } def __init__(self, domain, text): @@ -336,70 +349,70 @@ """Whois parser for .us domains """ regex = { - 'domain_name': 'Domain Name:\s*(.+)', - 'domain__id': 'Domain ID:\s*(.+)', - 'registrar': 'Sponsoring Registrar:\s*(.+)', - 'registrar_id': 'Sponsoring Registrar IANA ID:\s*(.+)', - 'registrar_url': 'Registrar URL \(registration services\):\s*(.+)', - 'status': 'Domain Status:\s*(.+)', # list of statuses - 'registrant_id': 'Registrant ID:\s*(.+)', - 'registrant_name': 'Registrant Name:\s*(.+)', - 'registrant_address1': 'Registrant Address1:\s*(.+)', - 'registrant_address2': 'Registrant Address2:\s*(.+)', - 'registrant_city': 'Registrant City:\s*(.+)', - 'registrant_state_province': 'Registrant State/Province:\s*(.+)', - 'registrant_postal_code': 'Registrant Postal Code:\s*(.+)', - 'registrant_country': 'Registrant Country:\s*(.+)', - 'registrant_country_code': 'Registrant Country Code:\s*(.+)', - 'registrant_phone_number': 'Registrant Phone Number:\s*(.+)', - 'registrant_email': 'Registrant Email:\s*(.+)', - 'registrant_application_purpose': 'Registrant Application Purpose:\s*(.+)', - 'registrant_nexus_category': 'Registrant Nexus Category:\s*(.+)', - 'admin_id': 'Administrative Contact ID:\s*(.+)', - 'admin_name': 'Administrative Contact Name:\s*(.+)', - 'admin_address1': 'Administrative Contact Address1:\s*(.+)', - 'admin_address2': 'Administrative Contact Address2:\s*(.+)', - 'admin_city': 'Administrative Contact City:\s*(.+)', - 'admin_state_province': 'Administrative Contact State/Province:\s*(.+)', - 'admin_postal_code': 'Administrative Contact Postal Code:\s*(.+)', - 'admin_country': 'Administrative Contact Country:\s*(.+)', - 'admin_country_code': 'Administrative Contact Country Code:\s*(.+)', - 'admin_phone_number': 'Administrative Contact Phone Number:\s*(.+)', - 'admin_email': 'Administrative Contact Email:\s*(.+)', - 'admin_application_purpose': 'Administrative Application Purpose:\s*(.+)', - 'admin_nexus_category': 'Administrative Nexus Category:\s*(.+)', - 'billing_id': 'Billing Contact ID:\s*(.+)', - 'billing_name': 'Billing Contact Name:\s*(.+)', - 'billing_address1': 'Billing Contact Address1:\s*(.+)', - 'billing_address2': 'Billing Contact Address2:\s*(.+)', - 'billing_city': 'Billing Contact City:\s*(.+)', - 'billing_state_province': 'Billing Contact State/Province:\s*(.+)', - 'billing_postal_code': 'Billing Contact Postal Code:\s*(.+)', - 'billing_country': 'Billing Contact Country:\s*(.+)', - 'billing_country_code': 'Billing Contact Country Code:\s*(.+)', - 'billing_phone_number': 'Billing Contact Phone Number:\s*(.+)', - 'billing_email': 'Billing Contact Email:\s*(.+)', - 'billing_application_purpose': 'Billing Application Purpose:\s*(.+)', - 'billing_nexus_category': 'Billing Nexus Category:\s*(.+)', - 'tech_id': 'Technical Contact ID:\s*(.+)', - 'tech_name': 'Technical Contact Name:\s*(.+)', - 'tech_address1': 'Technical Contact Address1:\s*(.+)', - 'tech_address2': 'Technical Contact Address2:\s*(.+)', - 'tech_city': 'Technical Contact City:\s*(.+)', - 'tech_state_province': 'Technical Contact State/Province:\s*(.+)', - 'tech_postal_code': 'Technical Contact Postal Code:\s*(.+)', - 'tech_country': 'Technical Contact Country:\s*(.+)', - 'tech_country_code': 'Technical Contact Country Code:\s*(.+)', - 'tech_phone_number': 'Technical Contact Phone Number:\s*(.+)', - 'tech_email': 'Technical Contact Email:\s*(.+)', - 'tech_application_purpose': 'Technical Application Purpose:\s*(.+)', - 'tech_nexus_category': 'Technical Nexus Category:\s*(.+)', - 'name_servers': 'Name Server:\s*(.+)', # list of name servers - 'created_by_registrar': 'Created by Registrar:\s*(.+)', - 'last_updated_by_registrar': 'Last Updated by Registrar:\s*(.+)', - 'creation_date': 'Domain Registration Date:\s*(.+)', - 'expiration_date': 'Domain Expiration Date:\s*(.+)', - 'updated_date': 'Domain Last Updated Date:\s*(.+)', + 'domain_name': 'Domain Name: *(.+)', + 'domain__id': 'Domain ID: *(.+)', + 'registrar': 'Sponsoring Registrar: *(.+)', + 'registrar_id': 'Sponsoring Registrar IANA ID: *(.+)', + 'registrar_url': 'Registrar URL \(registration services\): *(.+)', + 'status': 'Domain Status: *(.+)', # list of statuses + 'registrant_id': 'Registrant ID: *(.+)', + 'registrant_name': 'Registrant Name: *(.+)', + 'registrant_address1': 'Registrant Address1: *(.+)', + 'registrant_address2': 'Registrant Address2: *(.+)', + 'registrant_city': 'Registrant City: *(.+)', + 'registrant_state_province': 'Registrant State/Province: *(.+)', + 'registrant_postal_code': 'Registrant Postal Code: *(.+)', + 'registrant_country': 'Registrant Country: *(.+)', + 'registrant_country_code': 'Registrant Country Code: *(.+)', + 'registrant_phone_number': 'Registrant Phone Number: *(.+)', + 'registrant_email': 'Registrant Email: *(.+)', + 'registrant_application_purpose': 'Registrant Application Purpose: *(.+)', + 'registrant_nexus_category': 'Registrant Nexus Category: *(.+)', + 'admin_id': 'Administrative Contact ID: *(.+)', + 'admin_name': 'Administrative Contact Name: *(.+)', + 'admin_address1': 'Administrative Contact Address1: *(.+)', + 'admin_address2': 'Administrative Contact Address2: *(.+)', + 'admin_city': 'Administrative Contact City: *(.+)', + 'admin_state_province': 'Administrative Contact State/Province: *(.+)', + 'admin_postal_code': 'Administrative Contact Postal Code: *(.+)', + 'admin_country': 'Administrative Contact Country: *(.+)', + 'admin_country_code': 'Administrative Contact Country Code: *(.+)', + 'admin_phone_number': 'Administrative Contact Phone Number: *(.+)', + 'admin_email': 'Administrative Contact Email: *(.+)', + 'admin_application_purpose': 'Administrative Application Purpose: *(.+)', + 'admin_nexus_category': 'Administrative Nexus Category: *(.+)', + 'billing_id': 'Billing Contact ID: *(.+)', + 'billing_name': 'Billing Contact Name: *(.+)', + 'billing_address1': 'Billing Contact Address1: *(.+)', + 'billing_address2': 'Billing Contact Address2: *(.+)', + 'billing_city': 'Billing Contact City: *(.+)', + 'billing_state_province': 'Billing Contact State/Province: *(.+)', + 'billing_postal_code': 'Billing Contact Postal Code: *(.+)', + 'billing_country': 'Billing Contact Country: *(.+)', + 'billing_country_code': 'Billing Contact Country Code: *(.+)', + 'billing_phone_number': 'Billing Contact Phone Number: *(.+)', + 'billing_email': 'Billing Contact Email: *(.+)', + 'billing_application_purpose': 'Billing Application Purpose: *(.+)', + 'billing_nexus_category': 'Billing Nexus Category: *(.+)', + 'tech_id': 'Technical Contact ID: *(.+)', + 'tech_name': 'Technical Contact Name: *(.+)', + 'tech_address1': 'Technical Contact Address1: *(.+)', + 'tech_address2': 'Technical Contact Address2: *(.+)', + 'tech_city': 'Technical Contact City: *(.+)', + 'tech_state_province': 'Technical Contact State/Province: *(.+)', + 'tech_postal_code': 'Technical Contact Postal Code: *(.+)', + 'tech_country': 'Technical Contact Country: *(.+)', + 'tech_country_code': 'Technical Contact Country Code: *(.+)', + 'tech_phone_number': 'Technical Contact Phone Number: *(.+)', + 'tech_email': 'Technical Contact Email: *(.+)', + 'tech_application_purpose': 'Technical Application Purpose: *(.+)', + 'tech_nexus_category': 'Technical Nexus Category: *(.+)', + 'name_servers': 'Name Server: *(.+)', # list of name servers + 'created_by_registrar': 'Created by Registrar: *(.+)', + 'last_updated_by_registrar': 'Last Updated by Registrar: *(.+)', + 'creation_date': 'Domain Registration Date: *(.+)', + 'expiration_date': 'Domain Expiration Date: *(.+)', + 'updated_date': 'Domain Last Updated Date: *(.+)', } def __init__(self, domain, text): @@ -413,14 +426,14 @@ """Whois parser for .pl domains """ regex = { - 'domain_name': 'DOMAIN NAME:\s*(.+)\n', + 'domain_name': 'DOMAIN NAME: *(.+)\n', 'registrar': 'REGISTRAR:\n\s*(.+)', - 'registrar_url': 'URL:\s*(.+)', # not available + 'registrar_url': 'URL: *(.+)', # not available 'status': 'Registration status:\n\s*(.+)', # not available 'registrant_name': 'Registrant:\n\s*(.+)', # not available - 'creation_date': 'created:\s*(.+)\n', - 'expiration_date': 'renewal date:\s*(.+)', - 'updated_date': 'last modified:\s*(.+)\n', + 'creation_date': 'created: *(.+)\n', + 'expiration_date': 'renewal date: *(.+)', + 'updated_date': 'last modified: *(.+)\n', } def __init__(self, domain, text): @@ -434,8 +447,8 @@ """Whois parser for .ca domains """ regex = { - 'registrant_name': 'Name:\s*(.+)', - 'registrant_number': 'Number:\s*(.+)\n', + 'registrant_name': 'Name: *(.+)', + 'registrant_number': 'Number: *(.+)\n', } def __init__(self, domain, text): @@ -525,13 +538,13 @@ regex = { 'domain_name': 'Domain name:\n\s*(.+)', 'registrar': 'Registrar:\n\s*(.+)', - 'registrar_url': 'URL:\s*(.+)', + 'registrar_url': 'URL: *(.+)', 'status': 'Registration status:\n\s*(.+)', # list of statuses 'registrant_name': 'Registrant:\n\s*(.+)', - 'creation_date': 'Registered on:\s*(.+)', - 'expiration_date': 'Expiry date:\s*(.+)', - 'updated_date': 'Last updated:\s*(.+)', - 'name_servers': 'Name servers:\s*(.+)', + 'creation_date': 'Registered on: *(.+)', + 'expiration_date': 'Expiry date: *(.+)', + 'updated_date': 'Last updated: *(.+)', + 'name_servers': 'Name servers: *(.+)', } def __init__(self, domain, text): @@ -545,14 +558,14 @@ """Whois parser for .fr domains """ regex = { - 'domain_name': 'domain:\s*(.+)', - 'registrar': 'registrar:\s*(.+)', - 'creation_date': 'created:\s*(.+)', - 'expiration_date': 'anniversary:\s*(.+)', - 'name_servers': 'nserver:\s*(.+)', # list of name servers - 'status': 'status:\s*(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses - 'updated_date': 'last-update:\s*(.+)', + 'domain_name': 'domain: *(.+)', + 'registrar': 'registrar: *(.+)', + 'creation_date': 'created: *(.+)', + 'expiration_date': 'anniversary: *(.+)', + 'name_servers': 'nserver: *(.+)', # list of name servers + 'status': 'status: *(.+)', # list of statuses + 'emails': EMAIL_REGEX, # list of email addresses + 'updated_date': 'last-update: *(.+)', } def __init__(self, domain, text): @@ -566,17 +579,17 @@ """Whois parser for .fi domains """ regex = { - 'domain_name': 'domain:\s*([\S]+)', - 'name': 'descr:\s*([\S\ ]+)', - 'address': 'address:\s*([\S\ ]+)', - 'phone': 'phone:\s*([\S\ ]+)', - 'status': 'status:\s*([\S]+)', # list of statuses - 'creation_date': 'created:\s*([\S]+)', - 'updated_date': 'modified:\s*([\S]+)', - 'expiration_date': 'expires:\s*([\S]+)', - 'name_servers': 'nserver:\s*([\S]+) \[\S+\]', # list of name servers - 'name_server_statuses': 'nserver:\s*([\S]+) \[(\S+)\]', # list of name servers and statuses - 'dnssec': 'dnssec:\s*([\S]+)', + 'domain_name': 'domain: *([\S]+)', + 'name': 'descr: *([\S\ ]+)', + 'address': 'address: *([\S\ ]+)', + 'phone': 'phone: *([\S\ ]+)', + 'status': 'status: *([\S]+)', # list of statuses + 'creation_date': 'created: *([\S]+)', + 'updated_date': 'modified: *([\S]+)', + 'expiration_date': 'expires: *([\S]+)', + 'name_servers': 'nserver: *([\S]+) \[\S+\]', # list of name servers + 'name_server_statuses': 'nserver: *([\S]+) \[(\S+)\]', # list of name servers and statuses + 'dnssec': 'dnssec: *([\S]+)', } def __init__(self, domain, text): @@ -609,12 +622,12 @@ """Whois parser for .au domains """ regex = { - 'domain_name': 'Domain Name:\s*(.+)\n', - 'last_modified': 'Last Modified:\s*(.+)\n', - 'registrar': 'Registrar Name:\s*(.+)\n', - 'status': 'Status:\s*(.+)', - 'registrant_name': 'Registrant:\s*(.+)', - 'name_servers': 'Name Server:\s*(.+)', + 'domain_name': 'Domain Name: *(.+)\n', + 'last_modified': 'Last Modified: *(.+)\n', + 'registrar': 'Registrar Name: *(.+)\n', + 'status': 'Status: *(.+)', + 'registrant_name': 'Registrant: *(.+)', + 'name_servers': 'Name Server: *(.+)', } def __init__(self, domain, text): @@ -628,14 +641,14 @@ """Whois parser for .eu domains """ regex = { - 'domain_name': r'Domain:\s*([^\n\r]+)', - 'tech_name': r'Technical:\s*Name:\s*([^\n\r]+)', - 'tech_org': r'Technical:\s*Name:\s*[^\n\r]+\s*Organisation:\s*([^\n\r]+)', - 'tech_phone': r'Technical:\s*Name:\s*[^\n\r]+\s*Organisation:\s*[^\n\r]+\s*Language:\s*[^\n\r]+\s*Phone:\s*([^\n\r]+)', - 'tech_fax': r'Technical:\s*Name:\s*[^\n\r]+\s*Organisation:\s*[^\n\r]+\s*Language:\s*[^\n\r]+\s*Phone:\s*[^\n\r]+\s*Fax:\s*([^\n\r]+)', - 'tech_email': r'Technical:\s*Name:\s*[^\n\r]+\s*Organisation:\s*[^\n\r]+\s*Language:\s*[^\n\r]+\s*Phone:\s*[^\n\r]+\s*Fax:\s*[^\n\r]+\s*Email:\s*([^\n\r]+)', - 'registrar': r'Registrar:\s*Name:\s*([^\n\r]+)', - 'name_servers': r'Name servers:\s*([^\n\r]+)\s*([^\n\r]*)', # list of name servers + 'domain_name': r'Domain: *([^\n\r]+)', + 'tech_name': r'Technical: *Name: *([^\n\r]+)', + 'tech_org': r'Technical: *Name: *[^\n\r]+\s*Organisation: *([^\n\r]+)', + 'tech_phone': r'Technical: *Name: *[^\n\r]+\s*Organisation: *[^\n\r]+\s*Language: *[^\n\r]+\s*Phone: *([^\n\r]+)', + 'tech_fax': r'Technical: *Name: *[^\n\r]+\s*Organisation: *[^\n\r]+\s*Language: *[^\n\r]+\s*Phone: *[^\n\r]+\s*Fax: *([^\n\r]+)', + 'tech_email': r'Technical: *Name: *[^\n\r]+\s*Organisation: *[^\n\r]+\s*Language: *[^\n\r]+\s*Phone: *[^\n\r]+\s*Fax: *[^\n\r]+\s*Email: *([^\n\r]+)', + 'registrar': r'Registrar: *Name: *([^\n\r]+)', + 'name_servers': r'Name servers: *([^\n\r]+)\s*([^\n\r]*)', # list of name servers } def __init__(self, domain, text): @@ -649,25 +662,25 @@ """Whois parser for .br domains """ regex = { - 'domain': 'domain:\s*(.+)\n', - 'owner': 'owner:\s*([\S ]+)', - 'ownerid': 'ownerid:\s*(.+)', - 'country': 'country:\s*(.+)', - 'owner_c': 'owner-c:\s*(.+)', - 'admin_c': 'admin-c:\s*(.+)', - 'tech_c': 'tech-c:\s*(.+)', - 'billing_c': 'billing-c:\s*(.+)', - 'nserver': 'nserver:\s*(.+)', - 'nsstat': 'nsstat:\s*(.+)', - 'nslastaa': 'nslastaa:\s*(.+)', - 'saci': 'saci:\s*(.+)', - 'created': 'created:\s*(.+)', - 'expires': 'expires:\s*(.+)', - 'changed': 'changed:\s*(.+)', - 'status': 'status:\s*(.+)', - 'nic_hdl_br': 'nic-hdl-br:\s*(.+)', - 'person': 'person:\s*([\S ]+)', - 'email': 'e-mail:\s*(.+)', + 'domain': 'domain: *(.+)\n', + 'owner': 'owner: *([\S ]+)', + 'ownerid': 'ownerid: *(.+)', + 'country': 'country: *(.+)', + 'owner_c': 'owner-c: *(.+)', + 'admin_c': 'admin-c: *(.+)', + 'tech_c': 'tech-c: *(.+)', + 'billing_c': 'billing-c: *(.+)', + 'nserver': 'nserver: *(.+)', + 'nsstat': 'nsstat: *(.+)', + 'nslastaa': 'nslastaa: *(.+)', + 'saci': 'saci: *(.+)', + 'created': 'created: *(.+)', + 'expires': 'expires: *(.+)', + 'changed': 'changed: *(.+)', + 'status': 'status: *(.+)', + 'nic_hdl_br': 'nic-hdl-br: *(.+)', + 'person': 'person: *([\S ]+)', + 'email': 'e-mail: *(.+)', } def __init__(self, domain, text): @@ -682,18 +695,18 @@ """Whois parser for .kr domains """ regex = { - 'domain_name': 'Domain Name\s*:\s*(.+)', - 'registrant_org': 'Registrant\s*:\s*(.+)', - 'registrant_address': 'Registrant Address\s*:\s*(.+)', - 'registrant_zip': 'Registrant Zip Code\s*:\s*(.+)', - 'admin_name': 'Administrative Contact\(AC\)\s*:\s*(.+)', - 'admin_email': 'AC E-Mail\s*:\s*(.+)', - 'admin_phone': 'AC Phone Number\s*:\s*(.+)', - 'creation_date': 'Registered Date\s*:\s*(.+)', - 'updated_date': 'Last updated Date\s*:\s*(.+)', - 'expiration_date': 'Expiration Date\s*:\s*(.+)', - 'registrar': 'Authorized Agency\s*:\s*(.+)', - 'name_servers': 'Host Name\s*:\s*(.+)', # list of name servers + 'domain_name': 'Domain Name\s*: *(.+)', + 'registrant_org': 'Registrant\s*: *(.+)', + 'registrant_address': 'Registrant Address\s*: *(.+)', + 'registrant_zip': 'Registrant Zip Code\s*: *(.+)', + 'admin_name': 'Administrative Contact\(AC\)\s*: *(.+)', + 'admin_email': 'AC E-Mail\s*: *(.+)', + 'admin_phone': 'AC Phone Number\s*: *(.+)', + 'creation_date': 'Registered Date\s*: *(.+)', + 'updated_date': 'Last updated Date\s*: *(.+)', + 'expiration_date': 'Expiration Date\s*: *(.+)', + 'registrar': 'Authorized Agency\s*: *(.+)', + 'name_servers': 'Host Name\s*: *(.+)', # list of name servers } def __init__(self, domain, text): @@ -707,12 +720,12 @@ """Whois parser for .pt domains """ regex = { - 'domain_name': 'domain name:\s*(.+)', - 'creation_date': 'creation date \(dd\/mm\/yyyy\):\s*(.+)', - 'expiration_date': 'expiration date \(dd\/mm\/yyyy\):\s*(.+)', + 'domain_name': 'domain name: *(.+)', + 'creation_date': 'creation date \(dd\/mm\/yyyy\): *(.+)', + 'expiration_date': 'expiration date \(dd\/mm\/yyyy\): *(.+)', 'name_servers': '\tNS\t(.+).', # list of name servers - 'status': 'status:\s*(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses + 'status': 'status: *(.+)', # list of statuses + 'emails': EMAIL_REGEX, # list of email addresses } def __init__(self, domain, text): @@ -726,7 +739,7 @@ """Whois parser for .bg domains """ regex = { - 'expiration_date': 'expires at:\s*(.+)', + 'expiration_date': 'expires at: *(.+)', } dayfirst = True @@ -742,14 +755,14 @@ """Whois parser for .de domains """ regex = { - 'name': 'name:\s*(.+)', - 'org': 'Organisation:\s*(.+)', - 'address': 'Address:\s*(.+)', - 'zipcode': 'PostalCode:\s*(.+)', - 'city': 'City:\s*(.+)', - 'country_code': 'CountryCode:\s*(.+)', - 'phone': 'Phone:\s*(.+)', - 'fax': 'Fax:\s*(.+)' + 'name': 'name: *(.+)', + 'org': 'Organisation: *(.+)', + 'address': 'Address: *(.+)', + 'zipcode': 'PostalCode: *(.+)', + 'city': 'City: *(.+)', + 'country_code': 'CountryCode: *(.+)', + 'phone': 'Phone: *(.+)', + 'fax': 'Fax: *(.+)' } def __init__(self, domain, text): @@ -763,11 +776,11 @@ """Whois parser for .be domains """ regex = { - 'name': 'Name:\s*(.+)', - 'org': 'Organisation:\s*(.+)', - 'phone': 'Phone:\s*(.+)', - 'fax': 'Fax:\s*(.+)', - 'email': 'Email:\s*(.+)', + 'name': 'Name: *(.+)', + 'org': 'Organisation: *(.+)', + 'phone': 'Phone: *(.+)', + 'fax': 'Fax: *(.+)', + 'email': 'Email: *(.+)', } def __init__(self, domain, text): @@ -782,23 +795,23 @@ """Whois parser for .info domains """ regex = { - 'domain_name': 'Domain Name:\s?(.+)', - 'registrar': 'Registrar:\s?(.+)', - 'whois_server': 'Whois Server:\s?(.+)', # empty usually - 'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server: empty usually - 'updated_date': 'Updated Date:\s?(.+)', - 'creation_date': 'Creation Date:\s?(.+)', - 'expiration_date': 'Registry Expiry Date:\s?(.+)', - 'name_servers': 'Name Server:\s?(.+)', # list of name servers - 'status': 'Status:\s?(.+)', # list of statuses - 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses - 'name': 'Registrant Name:\s*(.+)', - 'org': 'Registrant Organization:\s*(.+)', - 'address': 'Registrant Street:\s*(.+)', - 'city': 'Registrant City:\s*(.+)', - 'state': 'Registrant State/Province:\s*(.+)', - 'zipcode': 'Registrant Postal Code:\s*(.+)', - 'country': 'Registrant Country:\s*(.+)', + 'domain_name': 'Domain Name: *(.+)', + 'registrar': 'Registrar: *(.+)', + 'whois_server': 'Whois Server: *(.+)', # empty usually + 'referral_url': 'Referral URL: *(.+)', # http url of whois_server: empty usually + 'updated_date': 'Updated Date: *(.+)', + 'creation_date': 'Creation Date: *(.+)', + 'expiration_date': 'Registry Expiry Date: *(.+)', + 'name_servers': 'Name Server: *(.+)', # list of name servers + 'status': 'Status: *(.+)', # list of statuses + 'emails': EMAIL_REGEX, # list of email addresses + 'name': 'Registrant Name: *(.+)', + 'org': 'Registrant Organization: *(.+)', + 'address': 'Registrant Street: *(.+)', + 'city': 'Registrant City: *(.+)', + 'state': 'Registrant State/Province: *(.+)', + 'zipcode': 'Registrant Postal Code: *(.+)', + 'country': 'Registrant Country: *(.+)', } def __init__(self, domain, text): @@ -826,72 +839,72 @@ """Whois parser for .us domains """ regex = { - 'domain_name': 'Domain Name:\s*(.+)', - 'domain__id': 'Domain ID:\s*(.+)', - 'registrar': 'Sponsoring Registrar:\s*(.+)', - 'registrar_id': 'Sponsoring Registrar IANA ID:\s*(.+)', - 'registrar_url': 'Registrar URL \(registration services\):\s*(.+)', + 'domain_name': 'Domain Name: *(.+)', + 'domain__id': 'Domain ID: *(.+)', + 'registrar': 'Sponsoring Registrar: *(.+)', + 'registrar_id': 'Sponsoring Registrar IANA ID: *(.+)', + 'registrar_url': 'Registrar URL \(registration services\): *(.+)', # list of statuses - 'status': 'Domain Status:\s*(.+)', - 'registrant_id': 'Registrant ID:\s*(.+)', - 'registrant_name': 'Registrant Name:\s*(.+)', - 'registrant_address1': 'Registrant Address1:\s*(.+)', - 'registrant_address2': 'Registrant Address2:\s*(.+)', - 'registrant_city': 'Registrant City:\s*(.+)', - 'registrant_state_province': 'Registrant State/Province:\s*(.+)', - 'registrant_postal_code': 'Registrant Postal Code:\s*(.+)', - 'registrant_country': 'Registrant Country:\s*(.+)', - 'registrant_country_code': 'Registrant Country Code:\s*(.+)', - 'registrant_phone_number': 'Registrant Phone Number:\s*(.+)', - 'registrant_email': 'Registrant Email:\s*(.+)', - 'registrant_application_purpose': 'Registrant Application Purpose:\s*(.+)', - 'registrant_nexus_category': 'Registrant Nexus Category:\s*(.+)', - 'admin_id': 'Administrative Contact ID:\s*(.+)', - 'admin_name': 'Administrative Contact Name:\s*(.+)', - 'admin_address1': 'Administrative Contact Address1:\s*(.+)', - 'admin_address2': 'Administrative Contact Address2:\s*(.+)', - 'admin_city': 'Administrative Contact City:\s*(.+)', - 'admin_state_province': 'Administrative Contact State/Province:\s*(.+)', - 'admin_postal_code': 'Administrative Contact Postal Code:\s*(.+)', - 'admin_country': 'Administrative Contact Country:\s*(.+)', - 'admin_country_code': 'Administrative Contact Country Code:\s*(.+)', - 'admin_phone_number': 'Administrative Contact Phone Number:\s*(.+)', - 'admin_email': 'Administrative Contact Email:\s*(.+)', - 'admin_application_purpose': 'Administrative Application Purpose:\s*(.+)', - 'admin_nexus_category': 'Administrative Nexus Category:\s*(.+)', - 'billing_id': 'Billing Contact ID:\s*(.+)', - 'billing_name': 'Billing Contact Name:\s*(.+)', - 'billing_address1': 'Billing Contact Address1:\s*(.+)', - 'billing_address2': 'Billing Contact Address2:\s*(.+)', - 'billing_city': 'Billing Contact City:\s*(.+)', - 'billing_state_province': 'Billing Contact State/Province:\s*(.+)', - 'billing_postal_code': 'Billing Contact Postal Code:\s*(.+)', - 'billing_country': 'Billing Contact Country:\s*(.+)', - 'billing_country_code': 'Billing Contact Country Code:\s*(.+)', - 'billing_phone_number': 'Billing Contact Phone Number:\s*(.+)', - 'billing_email': 'Billing Contact Email:\s*(.+)', - 'billing_application_purpose': 'Billing Application Purpose:\s*(.+)', - 'billing_nexus_category': 'Billing Nexus Category:\s*(.+)', - 'tech_id': 'Technical Contact ID:\s*(.+)', - 'tech_name': 'Technical Contact Name:\s*(.+)', - 'tech_address1': 'Technical Contact Address1:\s*(.+)', - 'tech_address2': 'Technical Contact Address2:\s*(.+)', - 'tech_city': 'Technical Contact City:\s*(.+)', - 'tech_state_province': 'Technical Contact State/Province:\s*(.+)', - 'tech_postal_code': 'Technical Contact Postal Code:\s*(.+)', - 'tech_country': 'Technical Contact Country:\s*(.+)', - 'tech_country_code': 'Technical Contact Country Code:\s*(.+)', - 'tech_phone_number': 'Technical Contact Phone Number:\s*(.+)', - 'tech_email': 'Technical Contact Email:\s*(.+)', - 'tech_application_purpose': 'Technical Application Purpose:\s*(.+)', - 'tech_nexus_category': 'Technical Nexus Category:\s*(.+)', + 'status': 'Domain Status: *(.+)', + 'registrant_id': 'Registrant ID: *(.+)', + 'registrant_name': 'Registrant Name: *(.+)', + 'registrant_address1': 'Registrant Address1: *(.+)', + 'registrant_address2': 'Registrant Address2: *(.+)', + 'registrant_city': 'Registrant City: *(.+)', + 'registrant_state_province': 'Registrant State/Province: *(.+)', + 'registrant_postal_code': 'Registrant Postal Code: *(.+)', + 'registrant_country': 'Registrant Country: *(.+)', + 'registrant_country_code': 'Registrant Country Code: *(.+)', + 'registrant_phone_number': 'Registrant Phone Number: *(.+)', + 'registrant_email': 'Registrant Email: *(.+)', + 'registrant_application_purpose': 'Registrant Application Purpose: *(.+)', + 'registrant_nexus_category': 'Registrant Nexus Category: *(.+)', + 'admin_id': 'Administrative Contact ID: *(.+)', + 'admin_name': 'Administrative Contact Name: *(.+)', + 'admin_address1': 'Administrative Contact Address1: *(.+)', + 'admin_address2': 'Administrative Contact Address2: *(.+)', + 'admin_city': 'Administrative Contact City: *(.+)', + 'admin_state_province': 'Administrative Contact State/Province: *(.+)', + 'admin_postal_code': 'Administrative Contact Postal Code: *(.+)', + 'admin_country': 'Administrative Contact Country: *(.+)', + 'admin_country_code': 'Administrative Contact Country Code: *(.+)', + 'admin_phone_number': 'Administrative Contact Phone Number: *(.+)', + 'admin_email': 'Administrative Contact Email: *(.+)', + 'admin_application_purpose': 'Administrative Application Purpose: *(.+)', + 'admin_nexus_category': 'Administrative Nexus Category: *(.+)', + 'billing_id': 'Billing Contact ID: *(.+)', + 'billing_name': 'Billing Contact Name: *(.+)', + 'billing_address1': 'Billing Contact Address1: *(.+)', + 'billing_address2': 'Billing Contact Address2: *(.+)', + 'billing_city': 'Billing Contact City: *(.+)', + 'billing_state_province': 'Billing Contact State/Province: *(.+)', + 'billing_postal_code': 'Billing Contact Postal Code: *(.+)', + 'billing_country': 'Billing Contact Country: *(.+)', + 'billing_country_code': 'Billing Contact Country Code: *(.+)', + 'billing_phone_number': 'Billing Contact Phone Number: *(.+)', + 'billing_email': 'Billing Contact Email: *(.+)', + 'billing_application_purpose': 'Billing Application Purpose: *(.+)', + 'billing_nexus_category': 'Billing Nexus Category: *(.+)', + 'tech_id': 'Technical Contact ID: *(.+)', + 'tech_name': 'Technical Contact Name: *(.+)', + 'tech_address1': 'Technical Contact Address1: *(.+)', + 'tech_address2': 'Technical Contact Address2: *(.+)', + 'tech_city': 'Technical Contact City: *(.+)', + 'tech_state_province': 'Technical Contact State/Province: *(.+)', + 'tech_postal_code': 'Technical Contact Postal Code: *(.+)', + 'tech_country': 'Technical Contact Country: *(.+)', + 'tech_country_code': 'Technical Contact Country Code: *(.+)', + 'tech_phone_number': 'Technical Contact Phone Number: *(.+)', + 'tech_email': 'Technical Contact Email: *(.+)', + 'tech_application_purpose': 'Technical Application Purpose: *(.+)', + 'tech_nexus_category': 'Technical Nexus Category: *(.+)', # list of name servers - 'name_servers': 'Name Server:\s*(.+)', - 'created_by_registrar': 'Created by Registrar:\s*(.+)', - 'last_updated_by_registrar': 'Last Updated by Registrar:\s*(.+)', - 'creation_date': 'Domain Registration Date:\s*(.+)', - 'expiration_date': 'Domain Expiration Date:\s*(.+)', - 'updated_date': 'Domain Last Updated Date:\s*(.+)', + 'name_servers': 'Name Server: *(.+)', + 'created_by_registrar': 'Created by Registrar: *(.+)', + 'last_updated_by_registrar': 'Last Updated by Registrar: *(.+)', + 'creation_date': 'Domain Registration Date: *(.+)', + 'expiration_date': 'Domain Expiration Date: *(.+)', + 'updated_date': 'Domain Last Updated Date: *(.+)', } def __init__(self, domain, text): @@ -905,11 +918,11 @@ """Whois parser for .io domains """ regex = { - 'status': 'Status\s*:\s*(.+)', - 'name_servers': 'NS \d?\s*:\s*(.+)', - 'owner': 'Owner\s*:\s*(.+)', - 'expiration_date': 'Expiry\s*:\s*(.+)', - 'domain_name': 'Domain\s*:\s*(.+)', + 'status': 'Status\s*: *(.+)', + 'name_servers': 'NS \d?\s*: *(.+)', + 'owner': 'Owner\s*: *(.+)', + 'expiration_date': 'Expiry\s*: *(.+)', + 'domain_name': 'Domain\s*: *(.+)', 'registrar': r'Check for \'[\w\.]*\' --- (.+)', } @@ -940,14 +953,14 @@ regex = { 'domain_name': 'Domain\s*([\w]+\.[\w]{2,5})', 'registrar': 'Domain support: \s*(.+)', - 'registrant_name': 'Name:\s*(.+)', - 'registrant_address1': 'Address:\s*(.+)', - 'registrant_phone_number': 'phone:\s*(.+)', - 'registrant_email': 'Email:\s*(.+)', + 'registrant_name': 'Name: *(.+)', + 'registrant_address1': 'Address: *(.+)', + 'registrant_phone_number': 'phone: *(.+)', + 'registrant_email': 'Email: *(.+)', # # list of name servers - 'name_servers': 'Name servers in the listed order:\s*([\d\w\.\s]+)', + 'name_servers': 'Name servers in the listed order: *([\d\w\.\s]+)', # 'name_servers': r'([\w]+\.[\w]+\.[\w]{2,5}\s*\d{1,3}\.\d]{1,3}\.[\d]{1-3}\.[\d]{1-3})', - 'creation_date': 'Record created:\s*(.+)', + 'creation_date': 'Record created: *(.+)', 'expiration_date': 'Record expires on \s*(.+)', 'updated_date': 'Record last updated on\s*(.+)', diff -r fa9650e9ec23 -r 359baebcf0e8 whois/time_zones.py --- a/whois/time_zones.py Wed Mar 16 15:08:15 2016 +0100 +++ b/whois/time_zones.py Wed Mar 16 15:18:36 2016 +0100 @@ -1,3 +1,10 @@ +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * _tz_string = '''-12 Y -11 X NUT SST -10 W CKT HAST HST TAHT TKT diff -r fa9650e9ec23 -r 359baebcf0e8 whois/whois.py --- a/whois/whois.py Wed Mar 16 15:08:15 2016 +0100 +++ b/whois/whois.py Wed Mar 16 15:18:36 2016 +0100 @@ -24,13 +24,20 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * +from builtins import object import re import sys import socket import optparse - class NICClient(object): ABUSEHOST = "whois.abuse.net" @@ -90,7 +97,7 @@ s.connect((hostname, 43)) # end takes bytes as an input queryBytes = None - if type(query) is not unicode: + if type(query) is not str: query = query.decode('utf-8') if hostname == NICClient.DENICHOST: @@ -109,12 +116,12 @@ break s.close() except socket.error as socketerror: - print 'Socket Error:', socketerror + print('Socket Error:', socketerror) return '' else: nhost = None response = response.decode('utf-8') - if 'with "=xxx"' in response: + if b'with "=xxx"' in response: return self.whois(query, hostname, flags, True) if flags & NICClient.WHOIS_RECURSE and nhost is None: nhost = self.findwhois_server(response, hostname, query) @@ -122,10 +129,11 @@ response += self.whois(query, nhost, 0) return response + def choose_server(self, domain): """Choose initial lookup NIC host""" - if type(domain) is not unicode: - domain = domain.decode('utf-8').encode('idna') + if type(domain) is not str: + domain = domain.decode('utf-8').encode('idna').decode('utf-8') if domain.endswith("-NORID"): return NICClient.NORIDHOST pos = domain.rfind('.') @@ -238,4 +246,4 @@ options, args = parse_command_line(sys.argv) if options.b_quicklookup: flags = flags | NICClient.WHOIS_QUICK - print nic_client.whois_lookup(options.__dict__, args[1], flags) + print(nic_client.whois_lookup(options.__dict__, args[1], flags))