whois/__init__.py
changeset 38 da8f2956db7e
parent 35 0de2468a27e8
child 60 7801a420f679
equal deleted inserted replaced
37:8ad334b5363b 38:da8f2956db7e
    54         line.lower().strip()
    54         line.lower().strip()
    55         for line in open(tlds_path).readlines()
    55         for line in open(tlds_path).readlines()
    56         if not line.startswith('#')
    56         if not line.startswith('#')
    57     ]
    57     ]
    58 
    58 
    59     url = re.sub('^.*://', '', url).split('/')[0].lower()
    59     if type(url) is not unicode:
       
    60         url = url.decode('utf-8')
       
    61     url = re.sub('^.*://', '', url.encode('idna')).split('/')[0].lower()
    60     domain = []
    62     domain = []
    61     url_sections = (
       
    62         section.decode('utf-8').encode('idna')
       
    63         for section in url.split('.')
       
    64     )
       
    65 
    63 
    66     for section in url_sections:
    64     for section in url.split('.'):
    67         if section in suffixes:
    65         if section in suffixes:
    68             domain.append(section)
    66             domain.append(section)
    69         else:
    67         else:
    70             domain = [section]
    68             domain = [section]
    71     return '.'.join(domain).decode('idna').encode('utf-8')
    69     return '.'.join(domain).decode('idna').encode('utf-8')