|
1 # parser.py - Module for parsing whois response data |
|
2 # Copyright (c) 2008 Andrey Petrov |
|
3 # |
|
4 # This module is part of pywhois and is released under |
|
5 # the MIT license: http://www.opensource.org/licenses/mit-license.php |
|
6 |
|
7 import re |
|
8 from datetime import datetime |
|
9 |
|
10 |
|
11 class PywhoisError(Exception): |
|
12 pass |
|
13 |
|
14 |
|
15 def cast_date(s): |
|
16 """Convert any date string found in WHOIS to a datetime object. |
|
17 """ |
|
18 known_formats = [ |
|
19 '%d-%b-%Y', # 02-jan-2000 |
|
20 '%Y-%m-%d', # 2000-01-02 |
|
21 '%d.%m.%Y', # 2000-01-02 |
|
22 '%Y.%m.%d', # 2000.01.02 |
|
23 '%Y/%m/%d', # 2000/01/02 |
|
24 '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC |
|
25 '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011 |
|
26 '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z |
|
27 ] |
|
28 |
|
29 for known_format in known_formats: |
|
30 try: |
|
31 return datetime.strptime(s.strip(), known_format) |
|
32 except ValueError as e: |
|
33 pass # Wrong format, keep trying |
|
34 return s |
|
35 |
|
36 |
|
37 class WhoisEntry(object): |
|
38 """Base class for parsing a Whois entries. |
|
39 """ |
|
40 # regular expressions to extract domain data from whois profile |
|
41 # child classes will override this |
|
42 _regex = { |
|
43 'domain_name': 'Domain Name:\s?(.+)', |
|
44 'registrar': 'Registrar:\s?(.+)', |
|
45 'whois_server': 'Whois Server:\s?(.+)', |
|
46 'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server |
|
47 'updated_date': 'Updated Date:\s?(.+)', |
|
48 'creation_date': 'Creation Date:\s?(.+)', |
|
49 'expiration_date': 'Expiration Date:\s?(.+)', |
|
50 'name_servers': 'Name Server:\s?(.+)', # list of name servers |
|
51 'status': 'Status:\s?(.+)', # list of statuses |
|
52 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses |
|
53 } |
|
54 |
|
55 def __init__(self, domain, text, regex=None): |
|
56 self.domain = domain |
|
57 self.text = text |
|
58 if regex is not None: |
|
59 self._regex = regex |
|
60 |
|
61 |
|
62 def __getattr__(self, attr): |
|
63 """The first time an attribute is called it will be calculated here. |
|
64 The attribute is then set to be accessed directly by subsequent calls. |
|
65 """ |
|
66 whois_regex = self._regex.get(attr) |
|
67 if whois_regex: |
|
68 values = re.findall(whois_regex, self.text, re.IGNORECASE) |
|
69 # try casting to date format |
|
70 values = [cast_date(value.strip()) for value in values] |
|
71 if len(values) == 1: |
|
72 values = values[0] |
|
73 setattr(self, attr, values) |
|
74 return getattr(self, attr) |
|
75 else: |
|
76 raise KeyError('Unknown attribute: %s' % attr) |
|
77 |
|
78 def __str__(self): |
|
79 """Print all whois properties of domain |
|
80 """ |
|
81 return '\n'.join('%s: %s' % (attr, str(getattr(self, attr))) for attr in self.attrs()) |
|
82 |
|
83 |
|
84 def attrs(self): |
|
85 """Return list of attributes that can be extracted for this domain |
|
86 """ |
|
87 return sorted(self._regex.keys()) |
|
88 |
|
89 |
|
90 @staticmethod |
|
91 def load(domain, text): |
|
92 """Given whois output in ``text``, return an instance of ``WhoisEntry`` that represents its parsed contents. |
|
93 """ |
|
94 if text.strip() == 'No whois server is known for this kind of object.': |
|
95 raise PywhoisError(text) |
|
96 |
|
97 if domain.endswith('.com'): |
|
98 return WhoisCom(domain, text) |
|
99 elif domain.endswith('.net'): |
|
100 return WhoisNet(domain, text) |
|
101 elif domain.endswith('.org'): |
|
102 return WhoisOrg(domain, text) |
|
103 elif domain.endswith('.name'): |
|
104 return WhoisName(domain, text) |
|
105 elif domain.endswith('.me'): |
|
106 return WhoisMe(domain, text) |
|
107 elif domain.endswith('.ru'): |
|
108 return WhoisRu(domain, text) |
|
109 elif domain.endswith('.us'): |
|
110 return WhoisUs(domain, text) |
|
111 elif domain.endswith('.uk'): |
|
112 return WhoisUk(domain, text) |
|
113 elif domain.endswith('.fr'): |
|
114 return WhoisFr(domain, text) |
|
115 elif domain.endswith('.fi'): |
|
116 return WhoisFi(domain, text) |
|
117 elif domain.endswith('.jp'): |
|
118 return WhoisJp(domain, text) |
|
119 elif domain.endswith('.pl'): |
|
120 return WhoisPl(domain, text) |
|
121 else: |
|
122 return WhoisEntry(domain, text) |
|
123 |
|
124 |
|
125 |
|
126 class WhoisCom(WhoisEntry): |
|
127 """Whois parser for .com domains |
|
128 """ |
|
129 def __init__(self, domain, text): |
|
130 if 'No match for "' in text: |
|
131 raise PywhoisError(text) |
|
132 else: |
|
133 WhoisEntry.__init__(self, domain, text) |
|
134 |
|
135 |
|
136 class WhoisNet(WhoisEntry): |
|
137 """Whois parser for .net domains |
|
138 """ |
|
139 def __init__(self, domain, text): |
|
140 if 'No match for "' in text: |
|
141 raise PywhoisError(text) |
|
142 else: |
|
143 WhoisEntry.__init__(self, domain, text) |
|
144 |
|
145 |
|
146 class WhoisOrg(WhoisEntry): |
|
147 """Whois parser for .org domains |
|
148 """ |
|
149 def __init__(self, domain, text): |
|
150 if text.strip() == 'NOT FOUND': |
|
151 raise PywhoisError(text) |
|
152 else: |
|
153 WhoisEntry.__init__(self, domain, text) |
|
154 |
|
155 |
|
156 class WhoisRu(WhoisEntry): |
|
157 """Whois parser for .ru domains |
|
158 """ |
|
159 regex = { |
|
160 'domain_name': 'domain:\s*(.+)', |
|
161 'registrar': 'registrar:\s*(.+)', |
|
162 'creation_date': 'created:\s*(.+)', |
|
163 'expiration_date': 'paid-till:\s*(.+)', |
|
164 'name_servers': 'nserver:\s*(.+)', # list of name servers |
|
165 'status': 'state:\s*(.+)', # list of statuses |
|
166 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses |
|
167 } |
|
168 |
|
169 def __init__(self, domain, text): |
|
170 if text.strip() == 'No entries found': |
|
171 raise PywhoisError(text) |
|
172 else: |
|
173 WhoisEntry.__init__(self, domain, text, self.regex) |
|
174 |
|
175 |
|
176 class WhoisName(WhoisEntry): |
|
177 """Whois parser for .name domains |
|
178 """ |
|
179 regex = { |
|
180 'domain_name_id': 'Domain Name ID:\s*(.+)', |
|
181 'domain_name': 'Domain Name:\s*(.+)', |
|
182 'registrar_id': 'Sponsoring Registrar ID:\s*(.+)', |
|
183 'registrar': 'Sponsoring Registrar:\s*(.+)', |
|
184 'registrant_id': 'Registrant ID:\s*(.+)', |
|
185 'admin_id': 'Admin ID:\s*(.+)', |
|
186 'technical_id': 'Tech ID:\s*(.+)', |
|
187 'billing_id': 'Billing ID:\s*(.+)', |
|
188 'creation_date': 'Created On:\s*(.+)', |
|
189 'expiration_date': 'Expires On:\s*(.+)', |
|
190 'updated_date': 'Updated On:\s*(.+)', |
|
191 'name_server_ids': 'Name Server ID:\s*(.+)', # list of name server ids |
|
192 'name_servers': 'Name Server:\s*(.+)', # list of name servers |
|
193 'status': 'Domain Status:\s*(.+)', # list of statuses |
|
194 } |
|
195 def __init__(self, domain, text): |
|
196 if 'No match.' in text: |
|
197 raise PywhoisError(text) |
|
198 else: |
|
199 WhoisEntry.__init__(self, domain, text, self.regex) |
|
200 |
|
201 |
|
202 class WhoisUs(WhoisEntry): |
|
203 """Whois parser for .us domains |
|
204 """ |
|
205 regex = { |
|
206 'domain_name': 'Domain Name:\s*(.+)', |
|
207 'domain__id': 'Domain ID:\s*(.+)', |
|
208 'registrar': 'Sponsoring Registrar:\s*(.+)', |
|
209 'registrar_id': 'Sponsoring Registrar IANA ID:\s*(.+)', |
|
210 'registrar_url': 'Registrar URL \(registration services\):\s*(.+)', |
|
211 'status': 'Domain Status:\s*(.+)', # list of statuses |
|
212 'registrant_id': 'Registrant ID:\s*(.+)', |
|
213 'registrant_name': 'Registrant Name:\s*(.+)', |
|
214 'registrant_address1': 'Registrant Address1:\s*(.+)', |
|
215 'registrant_address2': 'Registrant Address2:\s*(.+)', |
|
216 'registrant_city': 'Registrant City:\s*(.+)', |
|
217 'registrant_state_province': 'Registrant State/Province:\s*(.+)', |
|
218 'registrant_postal_code': 'Registrant Postal Code:\s*(.+)', |
|
219 'registrant_country': 'Registrant Country:\s*(.+)', |
|
220 'registrant_country_code': 'Registrant Country Code:\s*(.+)', |
|
221 'registrant_phone_number': 'Registrant Phone Number:\s*(.+)', |
|
222 'registrant_email': 'Registrant Email:\s*(.+)', |
|
223 'registrant_application_purpose': 'Registrant Application Purpose:\s*(.+)', |
|
224 'registrant_nexus_category': 'Registrant Nexus Category:\s*(.+)', |
|
225 'admin_id': 'Administrative Contact ID:\s*(.+)', |
|
226 'admin_name': 'Administrative Contact Name:\s*(.+)', |
|
227 'admin_address1': 'Administrative Contact Address1:\s*(.+)', |
|
228 'admin_address2': 'Administrative Contact Address2:\s*(.+)', |
|
229 'admin_city': 'Administrative Contact City:\s*(.+)', |
|
230 'admin_state_province': 'Administrative Contact State/Province:\s*(.+)', |
|
231 'admin_postal_code': 'Administrative Contact Postal Code:\s*(.+)', |
|
232 'admin_country': 'Administrative Contact Country:\s*(.+)', |
|
233 'admin_country_code': 'Administrative Contact Country Code:\s*(.+)', |
|
234 'admin_phone_number': 'Administrative Contact Phone Number:\s*(.+)', |
|
235 'admin_email': 'Administrative Contact Email:\s*(.+)', |
|
236 'admin_application_purpose': 'Administrative Application Purpose:\s*(.+)', |
|
237 'admin_nexus_category': 'Administrative Nexus Category:\s*(.+)', |
|
238 'billing_id': 'Billing Contact ID:\s*(.+)', |
|
239 'billing_name': 'Billing Contact Name:\s*(.+)', |
|
240 'billing_address1': 'Billing Contact Address1:\s*(.+)', |
|
241 'billing_address2': 'Billing Contact Address2:\s*(.+)', |
|
242 'billing_city': 'Billing Contact City:\s*(.+)', |
|
243 'billing_state_province': 'Billing Contact State/Province:\s*(.+)', |
|
244 'billing_postal_code': 'Billing Contact Postal Code:\s*(.+)', |
|
245 'billing_country': 'Billing Contact Country:\s*(.+)', |
|
246 'billing_country_code': 'Billing Contact Country Code:\s*(.+)', |
|
247 'billing_phone_number': 'Billing Contact Phone Number:\s*(.+)', |
|
248 'billing_email': 'Billing Contact Email:\s*(.+)', |
|
249 'billing_application_purpose': 'Billing Application Purpose:\s*(.+)', |
|
250 'billing_nexus_category': 'Billing Nexus Category:\s*(.+)', |
|
251 'tech_id': 'Technical Contact ID:\s*(.+)', |
|
252 'tech_name': 'Technical Contact Name:\s*(.+)', |
|
253 'tech_address1': 'Technical Contact Address1:\s*(.+)', |
|
254 'tech_address2': 'Technical Contact Address2:\s*(.+)', |
|
255 'tech_city': 'Technical Contact City:\s*(.+)', |
|
256 'tech_state_province': 'Technical Contact State/Province:\s*(.+)', |
|
257 'tech_postal_code': 'Technical Contact Postal Code:\s*(.+)', |
|
258 'tech_country': 'Technical Contact Country:\s*(.+)', |
|
259 'tech_country_code': 'Technical Contact Country Code:\s*(.+)', |
|
260 'tech_phone_number': 'Technical Contact Phone Number:\s*(.+)', |
|
261 'tech_email': 'Technical Contact Email:\s*(.+)', |
|
262 'tech_application_purpose': 'Technical Application Purpose:\s*(.+)', |
|
263 'tech_nexus_category': 'Technical Nexus Category:\s*(.+)', |
|
264 'name_servers': 'Name Server:\s*(.+)', # list of name servers |
|
265 'created_by_registrar': 'Created by Registrar:\s*(.+)', |
|
266 'last_updated_by_registrar': 'Last Updated by Registrar:\s*(.+)', |
|
267 'creation_date': 'Domain Registration Date:\s*(.+)', |
|
268 'expiration_date': 'Domain Expiration Date:\s*(.+)', |
|
269 'updated_date': 'Domain Last Updated Date:\s*(.+)', |
|
270 } |
|
271 def __init__(self, domain, text): |
|
272 if 'Not found:' in text: |
|
273 raise PywhoisError(text) |
|
274 else: |
|
275 WhoisEntry.__init__(self, domain, text, self.regex) |
|
276 |
|
277 |
|
278 class WhoisPl(WhoisEntry): |
|
279 """Whois parser for .uk domains |
|
280 """ |
|
281 regex = { |
|
282 'domain_name': 'DOMAIN NAME:\s*(.+)\n', |
|
283 'registrar': 'REGISTRAR:\n\s*(.+)', |
|
284 'registrar_url': 'URL:\s*(.+)', # not available |
|
285 'status': 'Registration status:\n\s*(.+)', # not available |
|
286 'registrant_name': 'Registrant:\n\s*(.+)', # not available |
|
287 'creation_date': 'created:\s*(.+)\n', |
|
288 'expiration_date': 'renewal date:\s*(.+)', |
|
289 'updated_date': 'last modified:\s*(.+)\n', |
|
290 } |
|
291 def __init__(self, domain, text): |
|
292 if 'Not found:' in text: |
|
293 raise PywhoisError(text) |
|
294 else: |
|
295 WhoisEntry.__init__(self, domain, text, self.regex) |
|
296 |
|
297 |
|
298 class WhoisMe(WhoisEntry): |
|
299 """Whois parser for .me domains |
|
300 """ |
|
301 regex = { |
|
302 'domain_id': 'Domain ID:(.+)', |
|
303 'domain_name': 'Domain Name:(.+)', |
|
304 'creation_date': 'Domain Create Date:(.+)', |
|
305 'updated_date': 'Domain Last Updated Date:(.+)', |
|
306 'expiration_date': 'Domain Expiration Date:(.+)', |
|
307 'transfer_date': 'Last Transferred Date:(.+)', |
|
308 'trademark_name': 'Trademark Name:(.+)', |
|
309 'trademark_country': 'Trademark Country:(.+)', |
|
310 'trademark_number': 'Trademark Number:(.+)', |
|
311 'trademark_application_date': 'Date Trademark Applied For:(.+)', |
|
312 'trademark_registration_date': 'Date Trademark Registered:(.+)', |
|
313 'registrar': 'Sponsoring Registrar:(.+)', |
|
314 'created_by': 'Created by:(.+)', |
|
315 'updated_by': 'Last Updated by Registrar:(.+)', |
|
316 'status': 'Domain Status:(.+)', # list of statuses |
|
317 'registrant_id': 'Registrant ID:(.+)', |
|
318 'registrant_name': 'Registrant Name:(.+)', |
|
319 'registrant_org': 'Registrant Organization:(.+)', |
|
320 'registrant_address': 'Registrant Address:(.+)', |
|
321 'registrant_address2': 'Registrant Address2:(.+)', |
|
322 'registrant_address3': 'Registrant Address3:(.+)', |
|
323 'registrant_city': 'Registrant City:(.+)', |
|
324 'registrant_state_province': 'Registrant State/Province:(.+)', |
|
325 'registrant_country': 'Registrant Country/Economy:(.+)', |
|
326 'registrant_postal_code': 'Registrant Postal Code:(.+)', |
|
327 'registrant_phone': 'Registrant Phone:(.+)', |
|
328 'registrant_phone_ext': 'Registrant Phone Ext\.:(.+)', |
|
329 'registrant_fax': 'Registrant FAX:(.+)', |
|
330 'registrant_fax_ext': 'Registrant FAX Ext\.:(.+)', |
|
331 'registrant_email': 'Registrant E-mail:(.+)', |
|
332 'admin_id': 'Admin ID:(.+)', |
|
333 'admin_name': 'Admin Name:(.+)', |
|
334 'admin_org': 'Admin Organization:(.+)', |
|
335 'admin_address': 'Admin Address:(.+)', |
|
336 'admin_address2': 'Admin Address2:(.+)', |
|
337 'admin_address3': 'Admin Address3:(.+)', |
|
338 'admin_city': 'Admin City:(.+)', |
|
339 'admin_state_province': 'Admin State/Province:(.+)', |
|
340 'admin_country': 'Admin Country/Economy:(.+)', |
|
341 'admin_postal_code': 'Admin Postal Code:(.+)', |
|
342 'admin_phone': 'Admin Phone:(.+)', |
|
343 'admin_phone_ext': 'Admin Phone Ext\.:(.+)', |
|
344 'admin_fax': 'Admin FAX:(.+)', |
|
345 'admin_fax_ext': 'Admin FAX Ext\.:(.+)', |
|
346 'admin_email': 'Admin E-mail:(.+)', |
|
347 'tech_id': 'Tech ID:(.+)', |
|
348 'tech_name': 'Tech Name:(.+)', |
|
349 'tech_org': 'Tech Organization:(.+)', |
|
350 'tech_address': 'Tech Address:(.+)', |
|
351 'tech_address2': 'Tech Address2:(.+)', |
|
352 'tech_address3': 'Tech Address3:(.+)', |
|
353 'tech_city': 'Tech City:(.+)', |
|
354 'tech_state_province': 'Tech State/Province:(.+)', |
|
355 'tech_country': 'Tech Country/Economy:(.+)', |
|
356 'tech_postal_code': 'Tech Postal Code:(.+)', |
|
357 'tech_phone': 'Tech Phone:(.+)', |
|
358 'tech_phone_ext': 'Tech Phone Ext\.:(.+)', |
|
359 'tech_fax': 'Tech FAX:(.+)', |
|
360 'tech_fax_ext': 'Tech FAX Ext\.:(.+)', |
|
361 'tech_email': 'Tech E-mail:(.+)', |
|
362 'name_servers': 'Nameservers:(.+)', # list of name servers |
|
363 } |
|
364 def __init__(self, domain, text): |
|
365 if 'NOT FOUND' in text: |
|
366 raise PywhoisError(text) |
|
367 else: |
|
368 WhoisEntry.__init__(self, domain, text, self.regex) |
|
369 |
|
370 |
|
371 class WhoisUk(WhoisEntry): |
|
372 """Whois parser for .uk domains |
|
373 """ |
|
374 regex = { |
|
375 'domain_name': 'Domain name:\n\s*(.+)', |
|
376 'registrar': 'Registrar:\n\s*(.+)', |
|
377 'registrar_url': 'URL:\s*(.+)', |
|
378 'status': 'Registration status:\n\s*(.+)', # list of statuses |
|
379 'registrant_name': 'Registrant:\n\s*(.+)', |
|
380 'creation_date': 'Registered on:\s*(.+)', |
|
381 'expiration_date': 'Expiry date:\s*(.+)', |
|
382 'updated_date': 'Last updated:\s*(.+)', |
|
383 'name_servers': 'Name servers:\s*(.+)', |
|
384 } |
|
385 def __init__(self, domain, text): |
|
386 if 'Not found:' in text: |
|
387 raise PywhoisError(text) |
|
388 else: |
|
389 WhoisEntry.__init__(self, domain, text, self.regex) |
|
390 |
|
391 |
|
392 class WhoisFr(WhoisEntry): |
|
393 """Whois parser for .fr domains |
|
394 """ |
|
395 regex = { |
|
396 'domain_name': 'domain:\s*(.+)', |
|
397 'registrar': 'registrar:\s*(.+)', |
|
398 'creation_date': 'created:\s*(.+)', |
|
399 'expiration_date': 'anniversary:\s*(.+)', |
|
400 'name_servers': 'nserver:\s*(.+)', # list of name servers |
|
401 'status': 'status:\s*(.+)', # list of statuses |
|
402 'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses |
|
403 'updated_date': 'last-update:\s*(.+)', |
|
404 } |
|
405 |
|
406 def __init__(self, domain, text): |
|
407 if text.strip() == 'No entries found': |
|
408 raise PywhoisError(text) |
|
409 else: |
|
410 WhoisEntry.__init__(self, domain, text, self.regex) |
|
411 |
|
412 |
|
413 class WhoisFi(WhoisEntry): |
|
414 """Whois parser for .fi domains |
|
415 """ |
|
416 regex = { |
|
417 'domain_name': 'domain:\s*([\S]+)', |
|
418 'registrant_name': 'descr:\s*([\S\ ]+)', |
|
419 'registrant_address': 'address:\s*([\S\ ]+)', |
|
420 'registrant_phone': 'phone:\s*([\S\ ]+)', |
|
421 'status': 'status:\s*([\S]+)', # list of statuses |
|
422 'creation_date': 'created:\s*([\S]+)', |
|
423 'updated_date': 'modified:\s*([\S]+)', |
|
424 'expiration_date': 'expires:\s*([\S]+)', |
|
425 'name_servers': 'nserver:\s*([\S]+) \[(\S+)\]', # list of name servers |
|
426 'dnssec': 'dnssec:\s*([\S]+)', # list of name servers |
|
427 } |
|
428 def __init__(self, domain, text): |
|
429 if 'Not found:' in text: |
|
430 raise PywhoisError(text) |
|
431 else: |
|
432 WhoisEntry.__init__(self, domain, text, self.regex) |
|
433 |
|
434 |
|
435 class WhoisJp(WhoisEntry): |
|
436 """Whois parser for .jp domains |
|
437 """ |
|
438 regex = { |
|
439 'domain_name': 'a\. \[Domain Name\]\s*(.+)', |
|
440 'registrant_org': 'g\. \[Organization\](.+)', |
|
441 'creation_date': r'\[Registered Date\]\s*(.+)', |
|
442 'name_servers': 'p\. \[Name Server\]\s*(.+)', # list of name servers |
|
443 'updated_date': '\[Last Update\]\s?(.+)', |
|
444 'status': '\[State\]\s*(.+)', # list of statuses |
|
445 } |
|
446 |
|
447 def __init__(self, domain, text): |
|
448 if text.strip() == 'No entries found': |
|
449 raise PywhoisError(text) |
|
450 else: |
|
451 WhoisEntry.__init__(self, domain, text, self.regex) |