equal
deleted
inserted
replaced
54 line.lower().strip() |
54 line.lower().strip() |
55 for line in open(tlds_path).readlines() |
55 for line in open(tlds_path).readlines() |
56 if not line.startswith('#') |
56 if not line.startswith('#') |
57 ] |
57 ] |
58 |
58 |
59 url = re.sub('^.*://', '', url).split('/')[0].lower() |
59 if type(url) is not unicode: |
|
60 url = url.decode('utf-8') |
|
61 url = re.sub('^.*://', '', url.encode('idna')).split('/')[0].lower() |
60 domain = [] |
62 domain = [] |
61 url_sections = ( |
|
62 section.decode('utf-8').encode('idna') |
|
63 for section in url.split('.') |
|
64 ) |
|
65 |
63 |
66 for section in url_sections: |
64 for section in url.split('.'): |
67 if section in suffixes: |
65 if section in suffixes: |
68 domain.append(section) |
66 domain.append(section) |
69 else: |
67 else: |
70 domain = [section] |
68 domain = [section] |
71 return '.'.join(domain).decode('idna').encode('utf-8') |
69 return '.'.join(domain).decode('idna').encode('utf-8') |