equal
deleted
inserted
replaced
77 suffixes = set(line.encode('utf-8') for line in tlds_fp.read().splitlines() if line and not line.startswith('//')) |
77 suffixes = set(line.encode('utf-8') for line in tlds_fp.read().splitlines() if line and not line.startswith('//')) |
78 |
78 |
79 if not isinstance(url, str): |
79 if not isinstance(url, str): |
80 url = url.decode('utf-8') |
80 url = url.decode('utf-8') |
81 url = re.sub('^.*://', '', url) |
81 url = re.sub('^.*://', '', url) |
82 url = url.split('/')[0].lower().encode('idna') |
82 url = url.split('/')[0].lower() |
83 |
83 |
84 # find the longest suffix match |
84 # find the longest suffix match |
85 domain = b'' |
85 domain = b'' |
86 for section in reversed(url.split(b'.')): |
86 for section in reversed(url.split('.')): |
87 if domain: |
87 if domain: |
88 domain = b'.' + domain |
88 domain = b'.' + domain |
89 domain = section + domain |
89 domain = section.encode('utf-8') + domain |
90 if domain not in suffixes: |
90 if domain not in suffixes: |
91 break |
91 break |
92 return domain.decode('idna') |
92 return domain.decode('utf-8') |
93 |
93 |
94 |
94 |
95 if __name__ == '__main__': |
95 if __name__ == '__main__': |
96 try: |
96 try: |
97 url = sys.argv[1] |
97 url = sys.argv[1] |