whois/__init__.py
branchpython3
changeset 71 b181f795cc0d
parent 70 1fe2c20adeba
child 73 644d81a7995b
equal deleted inserted replaced
70:1fe2c20adeba 71:b181f795cc0d
    50         # this is an IP address
    50         # this is an IP address
    51         return socket.gethostbyaddr(url)[0]
    51         return socket.gethostbyaddr(url)[0]
    52 
    52 
    53     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    53     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    54     suffixes = [
    54     suffixes = [
    55         line.lower().strip()
    55         line.lower().strip().encode('utf-8')
    56         for line in open(tlds_path).readlines()
    56         for line in open(tlds_path).readlines()
    57         if not line.startswith('#')
    57         if not line.startswith('#')
    58     ]
    58     ]
    59 
    59 
    60     if type(url) is not str:
    60     if not isinstance(url, str):
    61         url = url.decode('utf-8')
    61         url = url.decode('utf-8')
    62     url = re.sub('^.*://', '', url.encode('idna')).split('/')[0].lower()
    62     url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower()
    63     domain = []
    63     domain = []
       
    64     print('url:', url)
    64 
    65 
    65     for section in url.split('.'):
    66     for section in url.split(b'.'):
    66         if section in suffixes:
    67         if section in suffixes:
    67             domain.append(section)
    68             domain.append(section)
    68         else:
    69         else:
    69             domain = [section]
    70             domain = [section]
    70     return '.'.join(domain).decode('idna').encode('utf-8')
    71     return b'.'.join(domain).decode('idna')
    71 
    72 
    72 
    73 
    73 if __name__ == '__main__':
    74 if __name__ == '__main__':
    74     try:
    75     try:
    75         url = sys.argv[1]
    76         url = sys.argv[1]