test/test_parser.py
author Richard Baron Penman
Wed, 19 Oct 2011 17:09:00 +0900
changeset 0 ea0e45971cea
child 12 c57439b500cb
permissions -rw-r--r--
initial commit to mercurial
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     1
import unittest
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     2
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     3
import os
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     4
import sys
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     5
sys.path.append('../')
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     6
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     7
import time
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     8
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
     9
import simplejson
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    10
from glob import glob
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    11
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    12
from pywhois.parser import WhoisEntry, cast_date
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    13
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    14
class TestParser(unittest.TestCase):
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    15
    def test_com_expiration(self):
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    16
        data = """
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    17
            Status: ok
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    18
            Updated Date: 14-apr-2008
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    19
            Creation Date: 14-apr-2008
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    20
            Expiration Date: 14-apr-2009
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    21
            
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    22
            >>> Last update of whois database: Sun, 31 Aug 2008 00:18:23 UTC <<<
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    23
        """
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    24
        w = WhoisEntry.load('urlowl.com', data)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    25
        expires = w.get('expiration_date')
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    26
        self.assertEquals(expires, ['14-apr-2009'])
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    27
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    28
    def test_cast_date(self):
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    29
        dates = ['14-apr-2008', '2008-04-14']
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    30
        for d in dates:
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    31
            r = time.strftime('%Y-%m-%d', cast_date(d))
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    32
            self.assertEquals(r, '2008-04-14')
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    33
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    34
    def test_com_allsamples(self):
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    35
        """
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    36
        Iterate over all of the sample/whois/*.com files, read the data,
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    37
        parse it, and compare to the expected values in sample/expected/.
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    38
        Only keys defined in keys_to_test will be tested.
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    39
        
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    40
        To generate fresh expected value dumps, see NOTE below.
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    41
        """
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    42
        keys_to_test = ['domain_name', 'expiration_date', 'updated_date', 'creation_date', 'status']
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    43
        fail = 0
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    44
        for path in glob('test/samples/whois/*.com'):
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    45
            # Parse whois data
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    46
            domain = os.path.basename(path)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    47
            whois_fp = open(path)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    48
            data = whois_fp.read()
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    49
            
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    50
            w = WhoisEntry.load(domain, data)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    51
            results = {}
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    52
            for key in keys_to_test:
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    53
                results[key] = w.get(key)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    54
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    55
            # Load expected result
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    56
            expected_fp = open(os.path.join('test/samples/expected/', domain))
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    57
            expected_results = simplejson.load(expected_fp)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    58
            
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    59
            # NOTE: Toggle condition below to write expected results from the parse results
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    60
            # This will overwrite the existing expected results. Only do this if you've manually
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    61
            # confirmed that the parser is generating correct values at its current state.
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    62
            if False:
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    63
                expected_fp = open(os.path.join('test/samples/expected/', domain), 'w')
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    64
                expected_results = simplejson.dump(results, expected_fp)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    65
                continue
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    66
            
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    67
            # Compare each key
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    68
            for key in results:
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    69
                result = results.get(key)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    70
                expected = expected_results.get(key)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    71
                if expected != result:
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    72
                    print "%s \t(%s):\t %s != %s" % (domain, key, result, expected)
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    73
                    fail += 1
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    74
            
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    75
        if fail:
ea0e45971cea initial commit to mercurial
Richard Baron Penman
parents:
diff changeset
    76
            self.fail("%d sample whois attributes were not parsed properly!" % fail)