whois/__init__.py
changeset 136 30259bf0523f
parent 124 dfa8657bdefc
child 140 196df98347d8
equal deleted inserted replaced
135:808c8bc803f5 136:30259bf0523f
    71     # load known TLD suffixes
    71     # load known TLD suffixes
    72     global suffixes
    72     global suffixes
    73     if not suffixes:
    73     if not suffixes:
    74         # downloaded from https://publicsuffix.org/list/public_suffix_list.dat
    74         # downloaded from https://publicsuffix.org/list/public_suffix_list.dat
    75         tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'public_suffix_list.dat')
    75         tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'public_suffix_list.dat')
    76         with open(tlds_path) as tlds_fp:
    76         with open(tlds_path, encoding='utf-8') as tlds_fp:
    77             suffixes = set(line.encode('utf-8') for line in tlds_fp.read().splitlines() if line and not line.startswith('//'))
    77             suffixes = set(line.encode('utf-8') for line in tlds_fp.read().splitlines() if line and not line.startswith('//'))
    78 
    78 
    79     if not isinstance(url, str):
    79     if not isinstance(url, str):
    80         url = url.decode('utf-8')
    80         url = url.decode('utf-8')
    81     url = re.sub('^.*://', '', url)
    81     url = re.sub('^.*://', '', url)
    83 
    83 
    84     # find the longest suffix match
    84     # find the longest suffix match
    85     domain = b''
    85     domain = b''
    86     for section in reversed(url.split(b'.')):
    86     for section in reversed(url.split(b'.')):
    87         if domain:
    87         if domain:
    88             domain = '.' + domain
    88             domain = b'.' + domain
    89         domain = section + domain
    89         domain = section + domain
    90         if domain not in suffixes:
    90         if domain not in suffixes:
    91             break
    91             break
    92     return domain.decode('idna')
    92     return domain.decode('idna')
    93 
    93