whois/__init__.py
changeset 81 359baebcf0e8
parent 73 644d81a7995b
child 86 d6fcfa5acc7b
child 97 44522cd37b07
equal deleted inserted replaced
80:fa9650e9ec23 81:359baebcf0e8
       
     1 from __future__ import print_function
       
     2 from __future__ import absolute_import
       
     3 from __future__ import unicode_literals
       
     4 from __future__ import division
       
     5 from future import standard_library
       
     6 standard_library.install_aliases()
       
     7 from builtins import *
     1 import re
     8 import re
     2 import sys
     9 import sys
     3 import os
    10 import os
     4 import subprocess
    11 import subprocess
     5 import socket
    12 import socket
     6 from parser import WhoisEntry
    13 from .parser import WhoisEntry
     7 from whois import NICClient
    14 from .whois import NICClient
     8 
    15 
     9 
    16 
    10 def whois(url, command=False):
    17 def whois(url, command=False):
    11     # clean domain to expose netloc
    18     # clean domain to expose netloc
    12     ip_match = re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", url)
    19     ip_match = re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", url)
    43         # this is an IP address
    50         # this is an IP address
    44         return socket.gethostbyaddr(url)[0]
    51         return socket.gethostbyaddr(url)[0]
    45 
    52 
    46     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    53     tlds_path = os.path.join(os.getcwd(), os.path.dirname(__file__), 'data', 'tlds.txt')
    47     suffixes = [
    54     suffixes = [
    48         line.lower().strip()
    55         line.lower().strip().encode('utf-8')
    49         for line in open(tlds_path).readlines()
    56         for line in open(tlds_path).readlines()
    50         if not line.startswith('#')
    57         if not line.startswith('#')
    51     ]
    58     ]
    52 
    59 
    53     if type(url) is not unicode:
    60     if not isinstance(url, str):
    54         url = url.decode('utf-8')
    61         url = url.decode('utf-8')
    55     url = re.sub('^.*://', '', url.encode('idna')).split('/')[0].lower()
    62     url = re.sub(b'^.*://', b'', url.encode('idna')).split(b'/')[0].lower()
    56     domain = []
    63     domain = []
    57 
    64 
    58     for section in url.split('.'):
    65     for section in url.split(b'.'):
    59         if section in suffixes:
    66         if section in suffixes:
    60             domain.append(section)
    67             domain.append(section)
    61         else:
    68         else:
    62             domain = [section]
    69             domain = [section]
    63     return '.'.join(domain).decode('idna').encode('utf-8')
    70     return b'.'.join(domain).decode('idna')
    64 
    71 
    65 
    72 
    66 if __name__ == '__main__':
    73 if __name__ == '__main__':
    67     try:
    74     try:
    68         url = sys.argv[1]
    75         url = sys.argv[1]
    69     except IndexError:
    76     except IndexError:
    70         print 'Usage: %s url' % sys.argv[0]
    77         print('Usage: %s url' % sys.argv[0])
    71     else:
    78     else:
    72         print whois(url)
    79         print(whois(url))