| author | Grigouze <grigouze@yahoo.fr> |
| Thu, 30 Nov 2017 11:00:43 +0100 | |
| branch | whois_in |
| changeset 138 | f8c4b2f6355b |
| parent 131 | 193a62737030 |
| child 143 | 36a89acade53 |
| permissions | -rw-r--r-- |
| 55 | 1 |
Goal |
2 |
==== |
|
3 |
||
4 |
- Create a simple importable Python module which will produce parsed |
|
5 |
WHOIS data for a given domain. |
|
6 |
- Able to extract data for all the popular TLDs (com, org, net, ...) |
|
7 |
- Query a WHOIS server directly instead of going through an |
|
8 |
intermediate web service like many others do. |
|
|
79
95f170b4fd13
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
78
diff
changeset
|
9 |
- Works with Python 2 & 3 |
| 55 | 10 |
|
|
61
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
11 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
12 |
|
| 55 | 13 |
Example |
14 |
======= |
|
15 |
||
16 |
.. sourcecode:: python |
|
17 |
||
18 |
>>> import whois |
|
19 |
>>> w = whois.whois('webscraping.com')
|
|
20 |
>>> w.expiration_date # dates converted to datetime object |
|
21 |
datetime.datetime(2013, 6, 26, 0, 0) |
|
22 |
>>> w.text # the content downloaded from whois server |
|
23 |
u'\nWhois Server Version 2.0\n\nDomain names in the .com and .net |
|
24 |
...' |
|
25 |
||
26 |
>>> print w # print values of all found attributes |
|
27 |
creation_date: 2004-06-26 00:00:00 |
|
28 |
domain_name: [u'WEBSCRAPING.COM', u'WEBSCRAPING.COM'] |
|
29 |
emails: [u'WEBSCRAPING.COM@domainsbyproxy.com', u'WEBSCRAPING.COM@domainsbyproxy.com'] |
|
30 |
expiration_date: 2013-06-26 00:00:00 |
|
31 |
... |
|
32 |
||
|
61
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
33 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
34 |
|
| 55 | 35 |
Install |
36 |
======= |
|
37 |
||
38 |
Install from pypi: |
|
39 |
||
|
79
95f170b4fd13
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
78
diff
changeset
|
40 |
.. sourcecode:: bash |
| 55 | 41 |
|
42 |
pip install python-whois |
|
43 |
||
44 |
Or checkout latest version from repository: |
|
45 |
||
|
79
95f170b4fd13
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
78
diff
changeset
|
46 |
.. sourcecode:: bash |
| 55 | 47 |
|
48 |
hg clone https://bitbucket.org/richardpenman/pywhois |
|
49 |
||
|
79
95f170b4fd13
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
78
diff
changeset
|
50 |
Note that then you will need to manually install the futures module, which allows supporting both Python 2 & 3: |
|
78
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
51 |
|
|
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
52 |
|
|
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
53 |
.. sourcecode:: bash |
|
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
54 |
|
|
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
55 |
pip install futures |
|
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
56 |
|
|
9f5fac50355a
README.rst edited online with Bitbucket
Richard Penman <richard.penman@gmail.com>
parents:
77
diff
changeset
|
57 |
|
|
61
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
58 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
59 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
60 |
Changelog |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
61 |
========= |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
62 |
|
| 100 | 63 |
0.6 - 2016-03-02: |
| 77 | 64 |
|
65 |
* support added for python 3 |
|
66 |
* updated TLD list |
|
67 |
||
|
61
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
68 |
0.5 - 2015-09-05: |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
69 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
70 |
* added native client, which now handles whois requests by default |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
71 |
* added pretty formatting to string representation |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
72 |
* return None instead of raising KeyError when an attribute does not exist |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
73 |
* new TLD's: .mobi, .io, .kg, .su, .biz |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
74 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
75 |
0.4 - 2015-08-13: |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
76 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
77 |
* new TLD's: .de, .nl, .ca, .be |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
78 |
* migrated to bitbucket |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
79 |
* added socket timeout |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
80 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
81 |
0.3 - 2015-03-31: |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
82 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
83 |
* improved datetime parsing with python-dateutil when available |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
84 |
* base WhoisEntry class inherits from dict |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
85 |
* fixed TLD's: .org, .info |
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
86 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
87 |
|
|
c391985a797b
README.rst edited online with Bitbucket
Richard Penman <richardbp@gmail.com>
parents:
55
diff
changeset
|
88 |
|
|
131
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
89 |
Problems? |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
90 |
========= |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
91 |
|
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
92 |
Pull requests are welcome! |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
93 |
|
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
94 |
Thanks to the many who have sent patches for additional TLDs. If you want to add or fix a TLD it's quite straightforward. |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
95 |
See example domains in `whois/parser.py <https://bitbucket.org/richardpenman/pywhois/src/tip/whois/parser.py?at=default&fileviewer=file-view-default>`_ |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
96 |
|
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
97 |
Basically each TLD has a similar format to the following: |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
98 |
|
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
99 |
.. sourcecode:: python |
| 55 | 100 |
|
|
131
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
101 |
class WhoisOrg(WhoisEntry): |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
102 |
"""Whois parser for .org domains |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
103 |
""" |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
104 |
regex = {
|
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
105 |
'domain_name': 'Domain Name: *(.+)', |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
106 |
'registrar': 'Registrar: *(.+)', |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
107 |
'whois_server': 'Whois Server: *(.+)', |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
108 |
... |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
109 |
} |
| 55 | 110 |
|
|
131
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
111 |
def __init__(self, domain, text): |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
112 |
if text.strip() == 'NOT FOUND': |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
113 |
raise PywhoisError(text) |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
114 |
else: |
|
193a62737030
README.rst edited online with Bitbucket
Richard <richardbp@gmail.com>
parents:
100
diff
changeset
|
115 |
WhoisEntry.__init__(self, domain, text) |