5 import socket |
5 import socket |
6 from parser import WhoisEntry |
6 from parser import WhoisEntry |
7 from whois import NICClient |
7 from whois import NICClient |
8 |
8 |
9 |
9 |
10 def whois(url, experimental=False): |
10 def whois(url, command=False): |
11 # clean domain to expose netloc |
11 # clean domain to expose netloc |
12 ip_match = re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", url) |
12 ip_match = re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", url) |
13 if ip_match: |
13 if ip_match: |
14 domain = url |
14 domain = url |
15 else: |
15 else: |
16 domain = extract_domain(url) |
16 domain = extract_domain(url) |
17 if not experimental: |
17 if command: |
18 try: |
18 # try native whois command |
19 # try native whois command first |
19 r = subprocess.Popen(['whois', domain], stdout=subprocess.PIPE) |
20 r = subprocess.Popen(['whois', domain], stdout=subprocess.PIPE) |
20 text = r.stdout.read() |
21 text = r.stdout.read() |
|
22 except OSError: |
|
23 # try experimental client |
|
24 nic_client = NICClient() |
|
25 text = nic_client.whois_lookup(None, domain, 0) |
|
26 else: |
21 else: |
|
22 # try builtin client |
27 nic_client = NICClient() |
23 nic_client = NICClient() |
28 text = nic_client.whois_lookup(None, domain, 0) |
24 text = nic_client.whois_lookup(None, domain, 0) |
29 return WhoisEntry.load(domain, text) |
25 return WhoisEntry.load(domain, text) |
30 |
26 |
31 |
27 |
32 def extract_domain(url): |
28 def extract_domain(url): |
33 """Extract the domain from the given URL |
29 """Extract the domain from the given URL |
34 |
30 |
35 >>> extract_domain('http://www.google.com.au/tos.html') |
31 >>> extract_domain('http://www.google.com.au/tos.html') |
36 'google.com.au' |
32 'google.com.au' |
37 >>> extract_domain('http://blog.webscraping.com') |
33 >>> extract_domain('www.webscraping.com') |
38 'webscraping.com' |
34 'webscraping.com' |
39 >>> extract_domain('www.bbc.co.uk') |
|
40 'bbc.co.uk' |
|
41 >>> extract_domain('198.252.206.140') |
35 >>> extract_domain('198.252.206.140') |
42 'stackoverflow.com' |
36 'stackoverflow.com' |
43 >>> extract_domain('102.112.2O7.net') |
37 >>> extract_domain('102.112.2O7.net') |
44 '2o7.net' |
38 '2o7.net' |
45 >>> extract_domain('1-0-1-1-1-0-1-1-1-1-1-1-1-.0-0-0-0-0-0-0-0-0-0-0-0-0-10-0-0-0-0-0-0-0-0-0-0-0-0-0.info') |
39 >>> extract_domain('1-0-1-1-1-0-1-1-1-1-1-1-1-.0-0-0-0-0-0-0-0-0-0-0-0-0-10-0-0-0-0-0-0-0-0-0-0-0-0-0.info') |