whois/__init__.py
changeset 98 3202436d89d0
parent 86 d6fcfa5acc7b
parent 97 44522cd37b07
child 122 95feee1af1da
equal deleted inserted replaced
93:acdc2cb09f60 98:3202436d89d0
    55     if re.match(r'\d+\.\d+\.\d+\.\d+', url):
    55     if re.match(r'\d+\.\d+\.\d+\.\d+', url):
    56         # this is an IP address
    56         # this is an IP address
    57         return socket.gethostbyaddr(url)[0]
    57         return socket.gethostbyaddr(url)[0]
    58 
    58 
    59     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    59     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    60     suffixes = [
    60     with open(tlds_path) as tlds_fil:
    61         line.lower().strip().encode('utf-8')
    61         suffixes = [line.lower().encode('utf-8')
    62         for line in open(tlds_path).readlines()
    62                     for line in (x.strip() for x in tlds_fil)
    63         if not line.startswith('#')
    63                     if not line.startswith('#')]
    64     ]
    64     suff = 'xn--p1ai'
    65 
    65 
    66     if not isinstance(url, str):
    66     if not isinstance(url, str):
    67         url = url.decode('utf-8')
    67         url = url.decode('utf-8')
    68     url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower()
    68     url = re.sub('^.*://', '', url)
       
    69     url = url.split('/')[0].lower().encode('idna')
       
    70 
    69     domain = []
    71     domain = []
    70 
       
    71     for section in url.split(b'.'):
    72     for section in url.split(b'.'):
    72         if section in suffixes:
    73         if section in suffixes:
    73             domain.append(section)
    74             domain.append(section)
    74         else:
    75         else:
    75             domain = [section]
    76             domain = [section]