# HG changeset patch # User Evgeni Kunev # Date 1408451091 -10800 # Node ID 68375a7685989fbbd435c2fe1134060b0734a93a # Parent da8f2956db7e3a310092d96d598fb87131a0913f Better date parsing support if python-dateutil is available The `strptime` and `strftime` functions can't deal with timezone names not known to the current system when there's a %Z in the format string. diff -r da8f2956db7e -r 68375a768598 setup.py --- a/setup.py Mon Aug 18 13:10:53 2014 +0300 +++ b/setup.py Tue Aug 19 15:24:51 2014 +0300 @@ -26,6 +26,9 @@ package_data={ 'whois': ['data/*.txt'] }, + extras_require={ + 'better date conversion': ["python-dateutil"] + }, include_package_data=True, zip_safe=False, ) diff -r da8f2956db7e -r 68375a768598 whois/parser.py --- a/whois/parser.py Mon Aug 18 13:10:53 2014 +0300 +++ b/whois/parser.py Tue Aug 19 15:24:51 2014 +0300 @@ -7,43 +7,60 @@ # the MIT license: http://www.opensource.org/licenses/mit-license.php import re -from datetime import datetime - +try: + import dateutil.parser as dp + from whois.time_zones import tz_data + DATEUTIL = True +except ImportError: + from datetime import datetime + DATEUTIL = False + +KNOWN_FORMATS = [ + '%d-%b-%Y', # 02-jan-2000 + '%Y-%m-%d', # 2000-01-02 + '%d.%m.%Y', # 2.1.2000 + '%Y.%m.%d', # 2000.01.02 + '%Y/%m/%d', # 2000/01/02 + '%d/%m/%Y', # 02/01/2013 + '%Y. %m. %d.', # 2000. 01. 02. + '%Y.%m.%d %H:%M:%S', # 2014.03.08 10:28:24 + '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC + '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011 + '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z + '%Y-%m-%dT%H:%M:%S%z', # 2013-12-06T08:17:22-0800 + '%Y-%m-%d %H:%M:%SZ', # 2000-08-22 18:55:20Z + '%Y-%m-%d %H:%M:%S', # 2000-08-22 18:55:20 + '%d %b %Y %H:%M:%S', # 08 Apr 2013 05:44:00 + '%d/%m/%Y %H:%M:%S', # 23/04/2015 12:00:07 EEST + '%d/%m/%Y %H:%M:%S %Z', # 23/04/2015 12:00:07 EEST + '%d/%m/%Y %H:%M:%S.%f %Z', # 23/04/2015 12:00:07.619546 EEST +] + class PywhoisError(Exception): pass +def datetime_parse(s): + for known_format in KNOWN_FORMATS: + try: + s = datetime.strptime(s.strip(), known_format) + break + except ValueError as e: + pass # Wrong format, keep trying + return s + + def cast_date(s): """Convert any date string found in WHOIS to a datetime object. """ - known_formats = [ - '%d-%b-%Y', # 02-jan-2000 - '%Y-%m-%d', # 2000-01-02 - '%d.%m.%Y', # 2.1.2000 - '%Y.%m.%d', # 2000.01.02 - '%Y/%m/%d', # 2000/01/02 - '%d/%m/%Y', # 02/01/2013 - '%Y. %m. %d.', # 2000. 01. 02. - '%Y.%m.%d %H:%M:%S', # 2014.03.08 10:28:24 - '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC - '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011 - '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z - '%Y-%m-%dT%H:%M:%S%z', # 2013-12-06T08:17:22-0800 - '%Y-%m-%d %H:%M:%SZ', # 2000-08-22 18:55:20Z - '%Y-%m-%d %H:%M:%S', # 2000-08-22 18:55:20 - '%d %b %Y %H:%M:%S', # 08 Apr 2013 05:44:00 - '%d/%m/%Y %H:%M:%S %Z', # 23/04/2015 12:00:07 EEST - '%d/%m/%Y %H:%M:%S.%f %Z', # 23/04/2015 12:00:07.619546 EEST - ] - - for known_format in known_formats: + if DATEUTIL: try: - s = datetime.strptime(s.strip(), known_format) - break - except ValueError as e: - pass # Wrong format, keep trying - return s + return dp.parse(s.strip(), tzinfos=tz_data) + except Exception: + return datetime_parse(s) + else: + return datetime_parse(s) class WhoisEntry(object): diff -r da8f2956db7e -r 68375a768598 whois/time_zones.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/whois/time_zones.py Tue Aug 19 15:24:51 2014 +0300 @@ -0,0 +1,45 @@ +_tz_string = '''-12 Y +-11 X NUT SST +-10 W CKT HAST HST TAHT TKT +-9 V AKST GAMT GIT HADT HNY +-8 U AKDT CIST HAY HNP PST PT +-7 T HAP HNR MST PDT +-6 S CST EAST GALT HAR HNC MDT +-5 R CDT COT EASST ECT EST ET HAC HNE PET +-4 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT +-3 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT +-2 O BRST FNT PMDT UYST WGST +-1 N AZOT CVT EGT +0 Z EGST GMT UTC WET WT +1 A CET DFT WAT WEDT WEST +2 B CAT CEDT CEST EET SAST WAST +3 C EAT EEDT EEST IDT MSK +4 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT +5 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT +6 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST +7 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB +8 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST +9 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT +10 K AEST ChST PGT VLAT YAKST YAPT +11 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT +12 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT +13 FJST NZDT +11.5 NFT +10.5 ACDT LHST +9.5 ACST +6.5 CCT MMT +5.75 NPT +5.5 SLT +4.5 AFT IRDT +3.5 IRST +-2.5 HAT NDT +-3.5 HNT NST NT +-4.5 HLV VET +-9.5 MART MIT''' + +tz_data = {} + +for tz_descr in (tz_spec.split() for tz_spec in _tz_string.split('\n')): + tz_offset = int(float(tz_descr[0]) * 3600) + for tz_code in tz_descr[1:]: + tz_data[tz_code] = tz_offset