#!/usr/bin/env python
import sys
import urllib
import re
import Queue
import threading
import time
import socket
RANK = re.compile(r'<img src="/images/icons/globe-sm.jpg" alt="Global" style="margin-bottom:-2px;"/>([^<]+)</div>')
CHINA = re.compile(r'<img class="dynamic-icon"[^>]+"China Flag"/>([^<]+)</div>')
WWW = re.compile(r'^www\.')
def get_alexa(domain):
d = WWW.sub('', domain)
html = urllib.urlopen('http://www.alexa.com/siteinfo/' + d).read()
rt = RANK.findall(html)
output = domain + ','
if len(rt):
output += rt[0].strip().replace(',', '')
else:
output += '-'
output += ','
rt = CHINA.findall(html)
if len(rt):
output += rt[0].strip().replace(',', '')
else:
output += '-'
output += '\n'
sys.stdout.write(output)
class Alexa(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
domain = self.queue.get()
try:
get_alexa(domain)
except socket.error:
time.sleep(2)
self.queue.put(domain)
self.queue.task_done()
continue
except:
self.queue.task_done()
continue
self.queue.task_done()
queue = Queue.Queue()
for i in xrange(20):
t = Alexa(queue)
t.setDaemon(True)
t.start()
for line in open('./input.txt'):
queue.put(line.strip())
queue.join()
暂时没有留言。