Better date parsing support if python-dateutil is available
authorEvgeni Kunev <evgeni.kunev@gmail.com>
Tue, 19 Aug 2014 15:24:51 +0300
changeset 39 68375a768598
parent 38 da8f2956db7e
child 40 9573d5bc9ad5
Better date parsing support if python-dateutil is available The `strptime` and `strftime` functions can't deal with timezone names not known to the current system when there's a %Z in the format string.
setup.py
whois/parser.py
whois/time_zones.py
--- a/setup.py	Mon Aug 18 13:10:53 2014 +0300
+++ b/setup.py	Tue Aug 19 15:24:51 2014 +0300
@@ -26,6 +26,9 @@
     package_data={
         'whois': ['data/*.txt']
     },
+    extras_require={
+        'better date conversion': ["python-dateutil"]
+    },
     include_package_data=True,
     zip_safe=False,
 )
--- a/whois/parser.py	Mon Aug 18 13:10:53 2014 +0300
+++ b/whois/parser.py	Tue Aug 19 15:24:51 2014 +0300
@@ -7,43 +7,60 @@
 # the MIT license: http://www.opensource.org/licenses/mit-license.php
 
 import re
-from datetime import datetime
-   
+try:
+    import dateutil.parser as dp
+    from whois.time_zones import tz_data
+    DATEUTIL = True
+except ImportError:
+    from datetime import datetime
+    DATEUTIL = False
+
+KNOWN_FORMATS = [
+    '%d-%b-%Y', 				# 02-jan-2000
+    '%Y-%m-%d', 				# 2000-01-02
+    '%d.%m.%Y', 				# 2.1.2000
+    '%Y.%m.%d',                 # 2000.01.02
+    '%Y/%m/%d',                 # 2000/01/02
+    '%d/%m/%Y',                 # 02/01/2013
+    '%Y. %m. %d.',              # 2000. 01. 02.
+    '%Y.%m.%d %H:%M:%S',        # 2014.03.08 10:28:24
+    '%d-%b-%Y %H:%M:%S %Z',		# 24-Jul-2009 13:20:03 UTC
+    '%a %b %d %H:%M:%S %Z %Y',  # Tue Jun 21 23:59:59 GMT 2011
+    '%Y-%m-%dT%H:%M:%SZ',       # 2007-01-26T19:10:31Z
+    '%Y-%m-%dT%H:%M:%S%z',      # 2013-12-06T08:17:22-0800
+    '%Y-%m-%d %H:%M:%SZ',       # 2000-08-22 18:55:20Z
+    '%Y-%m-%d %H:%M:%S',        # 2000-08-22 18:55:20
+    '%d %b %Y %H:%M:%S',        # 08 Apr 2013 05:44:00
+    '%d/%m/%Y %H:%M:%S',     # 23/04/2015 12:00:07 EEST
+    '%d/%m/%Y %H:%M:%S %Z',     # 23/04/2015 12:00:07 EEST
+    '%d/%m/%Y %H:%M:%S.%f %Z',  # 23/04/2015 12:00:07.619546 EEST
+]
+
 
 class PywhoisError(Exception):
     pass
 
 
+def datetime_parse(s):
+    for known_format in KNOWN_FORMATS:
+        try:
+            s = datetime.strptime(s.strip(), known_format)
+            break
+        except ValueError as e:
+            pass  # Wrong format, keep trying
+    return s
+
+
 def cast_date(s):
     """Convert any date string found in WHOIS to a datetime object.
     """
-    known_formats = [
-        '%d-%b-%Y', 				# 02-jan-2000
-        '%Y-%m-%d', 				# 2000-01-02
-        '%d.%m.%Y', 				# 2.1.2000
-        '%Y.%m.%d',                 # 2000.01.02
-        '%Y/%m/%d',                 # 2000/01/02
-        '%d/%m/%Y',                 # 02/01/2013
-        '%Y. %m. %d.',              # 2000. 01. 02.
-        '%Y.%m.%d %H:%M:%S',        # 2014.03.08 10:28:24
-        '%d-%b-%Y %H:%M:%S %Z',		# 24-Jul-2009 13:20:03 UTC
-        '%a %b %d %H:%M:%S %Z %Y',  # Tue Jun 21 23:59:59 GMT 2011
-        '%Y-%m-%dT%H:%M:%SZ',       # 2007-01-26T19:10:31Z
-        '%Y-%m-%dT%H:%M:%S%z',      # 2013-12-06T08:17:22-0800
-        '%Y-%m-%d %H:%M:%SZ',       # 2000-08-22 18:55:20Z
-        '%Y-%m-%d %H:%M:%S',        # 2000-08-22 18:55:20
-        '%d %b %Y %H:%M:%S',        # 08 Apr 2013 05:44:00
-        '%d/%m/%Y %H:%M:%S %Z',     # 23/04/2015 12:00:07 EEST
-        '%d/%m/%Y %H:%M:%S.%f %Z',  # 23/04/2015 12:00:07.619546 EEST
-    ]
-
-    for known_format in known_formats:
+    if DATEUTIL:
         try:
-            s = datetime.strptime(s.strip(), known_format)
-            break
-        except ValueError as e:
-            pass # Wrong format, keep trying
-    return s
+            return dp.parse(s.strip(), tzinfos=tz_data)
+        except Exception:
+            return datetime_parse(s)
+    else:
+        return datetime_parse(s)
 
 
 class WhoisEntry(object):
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/whois/time_zones.py	Tue Aug 19 15:24:51 2014 +0300
@@ -0,0 +1,45 @@
+_tz_string = '''-12 Y
+-11 X NUT SST
+-10 W CKT HAST HST TAHT TKT
+-9 V AKST GAMT GIT HADT HNY
+-8 U AKDT CIST HAY HNP PST PT
+-7 T HAP HNR MST PDT
+-6 S CST EAST GALT HAR HNC MDT
+-5 R CDT COT EASST ECT EST ET HAC HNE PET
+-4 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT
+-3 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT
+-2 O BRST FNT PMDT UYST WGST
+-1 N AZOT CVT EGT
+0 Z EGST GMT UTC WET WT
+1 A CET DFT WAT WEDT WEST
+2 B CAT CEDT CEST EET SAST WAST
+3 C EAT EEDT EEST IDT MSK
+4 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT
+5 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT
+6 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST
+7 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB
+8 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST
+9 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT
+10 K AEST ChST PGT VLAT YAKST YAPT
+11 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT
+12 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT
+13 FJST NZDT
+11.5 NFT
+10.5 ACDT LHST
+9.5 ACST
+6.5 CCT MMT
+5.75 NPT
+5.5 SLT
+4.5 AFT IRDT
+3.5 IRST
+-2.5 HAT NDT
+-3.5 HNT NST NT
+-4.5 HLV VET
+-9.5 MART MIT'''
+
+tz_data = {}
+
+for tz_descr in (tz_spec.split() for tz_spec in _tz_string.split('\n')):
+    tz_offset = int(float(tz_descr[0]) * 3600)
+    for tz_code in tz_descr[1:]:
+        tz_data[tz_code] = tz_offset