文章标签 » python

蛇年话Python

又做了个Python分享,一批又一批的同事成了前同事,Python还是要继续忽悠的,呵呵~,分享之前还专门发到v2ex征求大家的意见,反映还不错,所以信心也足了些。40页的ppt,花了近2个小时分享完毕,主要两个主题:Python好学易用,Python是现在进行时。

PPT:http://emptyhua.github.com/python2013/
项目:https://github.com/emptyhua/python2013

这也算间接为Python社区做了小小贡献。

backup:展开文件路径中的通配符

python:

import glob
for input_file in glob.iglob(sys.argv[1]):
    for root, dirs, files in os.walk(input_file):
        for file in files:
            if file.endswith('.js') and not file.endswith('.min.js'):
                process_static_file(os.path.join(root, file))
            elif file.endswith('.css'):
                process_static_file(os.path.join(root, file))

php:

foreach (glob("*.txt") as $filename) {
    echo "$filename size " . filesize($filename) . "\n";
}

node.js:
https://github.com/isaacs/node-glob

var glob = require("glob")
 
// options is optional
glob("**/*.js", options, function (er, files) {
  // files is an array of filenames.
  // If the `nonull` option is set, and nothing
  // was found, then files is ["**/*.js"]
  // er is an error object or null.
})

backup:更实用的大文件搜索

findbigfile.py

#!/usr/bin/env python
import sys, os
 
#100M
BIG_FILE_SIZE = 1024 * 1024 * 100
BIG_DIR_SIZE = int(os.popen('getconf PAGE_SIZE').read().strip()) * 1024
EXCLUDES = ['/home/cdn/wpt_data']
 
EXCLUDES = [os.path.normpath(p) for p in EXCLUDES]
stack = [os.path.normpath(sys.argv[1])] 
 
def format_size(s):
    if s < 1024:
        return '%d Bytes' % s
    if s < 1024 * 1024:
        return '%d KB' % (s/1024)
    if s < 1024 * 1024 * 1024:
    	return '%d MB' % (s/1024/1024)
    return '%d GB' % (s/1024/1024/1024)
 
while len(stack):
    check_file = stack.pop()
 
    if check_file in EXCLUDES:
        continue
 
    #pass link
    if os.path.islink(check_file):
        continue
 
    if os.path.isfile(check_file):
        s = os.path.getsize(check_file)
        if s > BIG_FILE_SIZE:
            print 'BIG FILE:%s\t%s' % (check_file, format_size(s))
        continue
 
    if os.path.isdir(check_file):
        s = os.path.getsize(check_file)
 
        #pass big directory
        if s > BIG_DIR_SIZE:
            print 'BIG DIR:%s\t%s' % (check_file, format_size(s))
            continue
 
        try:
            for f in os.listdir(check_file):
                stack.append(os.path.join(check_file, f))
        except:
            pass

backup:alexa query

#!/usr/bin/env python
 
import sys
import urllib
import re
import Queue
import threading
import time
import socket
 
RANK = re.compile(r'<img src="/images/icons/globe-sm.jpg" alt="Global" style="margin-bottom:-2px;"/>([^<]+)</div>')
CHINA = re.compile(r'<img class="dynamic-icon"[^>]+"China Flag"/>([^<]+)</div>')
WWW = re.compile(r'^www\.')
def get_alexa(domain):
    d = WWW.sub('', domain)
    html = urllib.urlopen('http://www.alexa.com/siteinfo/' + d).read()
    rt = RANK.findall(html)
    output = domain + ','
    if len(rt):
        output += rt[0].strip().replace(',', '')
    else:
        output += '-'
    output += ','
    rt = CHINA.findall(html)
    if len(rt):
        output += rt[0].strip().replace(',', '')
    else:
        output += '-'
    output += '\n'
    sys.stdout.write(output)
 
 
class Alexa(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.queue = queue
 
    def run(self):
        while True:
            domain = self.queue.get()
            try:
                get_alexa(domain)
            except socket.error:
                time.sleep(2)
                self.queue.put(domain)
                self.queue.task_done()
                continue
            except:
                self.queue.task_done()
                continue
            self.queue.task_done()
 
queue = Queue.Queue()
for i in xrange(20):
    t = Alexa(queue)
    t.setDaemon(True)
    t.start()
 
for line in open('./input.txt'):
    queue.put(line.strip())
queue.join()

backcup:unbuffer implementation in python

#!/usr/bin/env python
import os,sys,pty
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)
 
pid, fd = pty.fork()
if pid < 0:
    print >>sys.stderr, 'fork error'
#child
elif pid == 0:
    os.execvp(sys.argv[1], sys.argv[1:])
#parent
else:
    while True:
        try:
            s = os.read(fd, 1)
        except OSError, e:
            break
        if s == '':
            break
        sys.stdout.write(s)
    os.wait()