文章标签 » python

蛇年话Python

又做了个Python分享,一批又一批的同事成了前同事,Python还是要继续忽悠的,呵呵~,分享之前还专门发到v2ex征求大家的意见,反映还不错,所以信心也足了些。40页的ppt,花了近2个小时分享完毕,主要两个主题:Python好学易用,Python是现在进行时。

PPT:http://emptyhua.github.com/python2013/
项目:https://github.com/emptyhua/python2013

这也算间接为Python社区做了小小贡献。

backup:展开文件路径中的通配符

python:

import glob
for input_file in glob.iglob(sys.argv[1]):
    for root, dirs, files in os.walk(input_file):
        for file in files:
            if file.endswith('.js') and not file.endswith('.min.js'):
                process_static_file(os.path.join(root, file))
            elif file.endswith('.css'):
                process_static_file(os.path.join(root, file))

php:

foreach (glob("*.txt") as $filename) {
    echo "$filename size " . filesize($filename) . "\n";
}

node.js:
https://github.com/isaacs/node-glob

var glob = require("glob")

// options is optional
glob("**/*.js", options, function (er, files) {
  // files is an array of filenames.
  // If the `nonull` option is set, and nothing
  // was found, then files is ["**/*.js"]
  // er is an error object or null.
})

backup:更实用的大文件搜索

findbigfile.py

#!/usr/bin/env python
import sys, os

#100M
BIG_FILE_SIZE = 1024 * 1024 * 100
BIG_DIR_SIZE = int(os.popen('getconf PAGE_SIZE').read().strip()) * 1024
EXCLUDES = ['/home/cdn/wpt_data']

EXCLUDES = [os.path.normpath(p) for p in EXCLUDES]
stack = [os.path.normpath(sys.argv[1])] 

def format_size(s):
    if s < 1024:
        return '%d Bytes' % s
    if s < 1024 * 1024:
        return '%d KB' % (s/1024)
    if s < 1024 * 1024 * 1024:
    	return '%d MB' % (s/1024/1024)
    return '%d GB' % (s/1024/1024/1024)

while len(stack):
    check_file = stack.pop()

    if check_file in EXCLUDES:
        continue

    #pass link
    if os.path.islink(check_file):
        continue

    if os.path.isfile(check_file):
        s = os.path.getsize(check_file)
        if s > BIG_FILE_SIZE:
            print 'BIG FILE:%s\t%s' % (check_file, format_size(s))
        continue

    if os.path.isdir(check_file):
        s = os.path.getsize(check_file)

        #pass big directory
        if s > BIG_DIR_SIZE:
            print 'BIG DIR:%s\t%s' % (check_file, format_size(s))
            continue

        try:
            for f in os.listdir(check_file):
                stack.append(os.path.join(check_file, f))
        except:
            pass

backup:alexa query

#!/usr/bin/env python

import sys
import urllib
import re
import Queue
import threading
import time
import socket

RANK = re.compile(r'Global([^<]+)
') CHINA = re.compile(r']+"China Flag"/>([^<]+)
') WWW = re.compile(r'^www\.') def get_alexa(domain): d = WWW.sub('', domain) html = urllib.urlopen('http://www.alexa.com/siteinfo/' + d).read() rt = RANK.findall(html) output = domain + ',' if len(rt): output += rt[0].strip().replace(',', '') else: output += '-' output += ',' rt = CHINA.findall(html) if len(rt): output += rt[0].strip().replace(',', '') else: output += '-' output += '\n' sys.stdout.write(output) class Alexa(threading.Thread): def __init__(self, queue): threading.Thread.__init__(self) self.queue = queue def run(self): while True: domain = self.queue.get() try: get_alexa(domain) except socket.error: time.sleep(2) self.queue.put(domain) self.queue.task_done() continue except: self.queue.task_done() continue self.queue.task_done() queue = Queue.Queue() for i in xrange(20): t = Alexa(queue) t.setDaemon(True) t.start() for line in open('./input.txt'): queue.put(line.strip()) queue.join()

分类目录 : code
标签 : ,

backcup:unbuffer implementation in python

#!/usr/bin/env python
import os,sys,pty
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)

pid, fd = pty.fork()
if pid < 0:
    print >>sys.stderr, 'fork error'
#child
elif pid == 0:
    os.execvp(sys.argv[1], sys.argv[1:])
#parent
else:
    while True:
        try:
            s = os.read(fd, 1)
        except OSError, e:
            break
        if s == '':
            break
        sys.stdout.write(s)
    os.wait()

分类目录 : code
标签 : , ,