whois/__init__.py
changeset 140 196df98347d8
parent 136 30259bf0523f
child 158 bcae8cb61002
equal deleted inserted replaced
136:30259bf0523f 140:196df98347d8
    77             suffixes = set(line.encode('utf-8') for line in tlds_fp.read().splitlines() if line and not line.startswith('//'))
    77             suffixes = set(line.encode('utf-8') for line in tlds_fp.read().splitlines() if line and not line.startswith('//'))
    78 
    78 
    79     if not isinstance(url, str):
    79     if not isinstance(url, str):
    80         url = url.decode('utf-8')
    80         url = url.decode('utf-8')
    81     url = re.sub('^.*://', '', url)
    81     url = re.sub('^.*://', '', url)
    82     url = url.split('/')[0].lower().encode('idna')
    82     url = url.split('/')[0].lower()
    83 
    83 
    84     # find the longest suffix match
    84     # find the longest suffix match
    85     domain = b''
    85     domain = b''
    86     for section in reversed(url.split(b'.')):
    86     for section in reversed(url.split('.')):
    87         if domain:
    87         if domain:
    88             domain = b'.' + domain
    88             domain = b'.' + domain
    89         domain = section + domain
    89         domain = section.encode('utf-8') + domain
    90         if domain not in suffixes:
    90         if domain not in suffixes:
    91             break
    91             break
    92     return domain.decode('idna')
    92     return domain.decode('utf-8')
    93 
    93 
    94 
    94 
    95 if __name__ == '__main__':
    95 if __name__ == '__main__':
    96     try:
    96     try:
    97         url = sys.argv[1]
    97         url = sys.argv[1]