whois/__init__.py
changeset 97 44522cd37b07
parent 73 644d81a7995b
child 98 3202436d89d0
equal deleted inserted replaced
96:4177eba88c22 97:44522cd37b07
    49     if re.match(r'\d+\.\d+\.\d+\.\d+', url):
    49     if re.match(r'\d+\.\d+\.\d+\.\d+', url):
    50         # this is an IP address
    50         # this is an IP address
    51         return socket.gethostbyaddr(url)[0]
    51         return socket.gethostbyaddr(url)[0]
    52 
    52 
    53     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    53     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    54     suffixes = [
    54     with open(tlds_path) as tlds_fil:
    55         line.lower().strip().encode('utf-8')
    55         suffixes = [line.lower().encode('utf-8')
    56         for line in open(tlds_path).readlines()
    56                     for line in (x.strip() for x in tlds_fil)
    57         if not line.startswith('#')
    57                     if not line.startswith('#')]
    58     ]
    58     suff = 'xn--p1ai'
    59 
    59 
    60     if not isinstance(url, str):
    60     if not isinstance(url, str):
    61         url = url.decode('utf-8')
    61         url = url.decode('utf-8')
    62     url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower()
    62     url = re.sub('^.*://', '', url)
       
    63     url = url.split('/')[0].lower().encode('idna')
       
    64 
    63     domain = []
    65     domain = []
    64 
       
    65     for section in url.split(b'.'):
    66     for section in url.split(b'.'):
    66         if section in suffixes:
    67         if section in suffixes:
    67             domain.append(section)
    68             domain.append(section)
    68         else:
    69         else:
    69             domain = [section]
    70             domain = [section]