Python


Install

wget -q http://peak.telecommunity.com/dist/ez_setup.py
sudo python ez_setup.py

wget http://www.python.org/ftp/python/2.6.6/Python-2.6.6.tar.bz2

easy_install -m suds
#/path/to/site-packages/easy-install.pth
#/path/to/site-packages/suds.egg

sqlite3 on RHEL/CentOS with python 2.4.x

yum install python-devel
easy_install pysqlite
try:
  import sqlite3
except:
  from pysqlite2 import dbapi2 as sqlite3

Unicode

Python 2.x 的 Unicode 处理不太友好,下面的模版可以有效解决这个问题。,注意,文件的编码要改成utf-8!

# -*- coding: utf-8 -*-

# set utf-8 as the default encoding
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

# your code goes here

在以上的模板代码基础上写你自己的代码, 基本上可以解决中文字符串的连接,比较等操作

另,也可直接修改库目录下的site.py文件,让python默认使用unicode来处理字符串。和上述模板的作用是一样的。

--- site.py     Wed Nov 16 10:46:20 2011
+++ site_old.py Tue Mar  8 09:43:22 2011
@@ -488,7 +488,7 @@
     """Set the string encoding used by the Unicode implementation.  The
     default is 'ascii', but if you're willing to experiment, you can
     change this."""
-    encoding = "utf-8" # Default value set by _PyUnicode_Init()
+    encoding = "ascii" # Default value set by _PyUnicode_Init()
     if 0:
         # Enable to support locale aware default string encodings.
         import locale

特别注意,如果你从外部获取数据(比如从文件读入),请确保你的外部数据源也是使用 UTF-8 进行编码的。

string.decode('cp936') # cp936 -> utf-8
string.encode('cp936') # utf-8 -> cp936
isinstance(u'string', unicode) # whether `u'string'` is unicode.

Example

round(3.1415926, 2)
divmod(15, 6)
ord('a')
chr(97)
# Hello World
print 'Hello World'
print('Hello World')
s = 'Hello World'
print(s)
hw = ('Hello', 'World')
print '%s %s' % hw
print '%s %s' % ('Hello', 'World')
print '{0} {1}'.format('Hello', 'World')

fruits = {'apple', 'pear'}
items = fruits.items()
items.sort()

爬虫

import urllib, urllib2, cookielib
proxy_support = urllib2.ProxyHandler({'http':'http://127.0.0.1:8000'})
cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(proxy_support, cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
postdata=urllib.urlencode({
    'username':'XXXXX',
    'password':'XXXXX',
    'continueURI':'http://www.verycd.com/',
    'fk':fk,
    'login_submit':'登录'
})
headers = {
    'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
    'Referer':'http://www.verycd.com/'
}
req = urllib2.Request(
    url = 'http://secure.verycd.com/signin/*/http://www.verycd.com/',
    data = postdata,
    headers = headers
)
result = urllib2.urlopen(req).read()
from threading import Thread
from Queue import Queue
from time import sleep
#q是任务队列
#NUM是并发线程总数
#JOBS是有多少任务
q = Queue()
NUM = 2
JOBS = 10
#具体的处理函数,负责处理单个任务
def do_somthing_using(arguments):
    print arguments
#这个是工作进程,负责不断从队列取数据并处理
def working():
    while True:
        arguments = q.get()
        do_somthing_using(arguments)
        sleep(1)
        q.task_done()
#fork NUM个线程等待队列
for i in range(NUM):
    t = Thread(target=working)
    t.setDaemon(True)
    t.start()
#把JOBS排入队列
for i in range(JOBS):
    q.put(i)
#等待所有JOBS完成
q.join()

pexpect

#!/usr/bin/python
 
import pexpect
import sys
import time
import os
 
def ssh_cmd(ip, user, pwd, cmds):
  try:
    foo = pexpect.spawn('ssh %s@%s' % (user,ip))
    index = foo.expect(['Password:', 'yes/no'])
 
    if index == 0:
      foo.sendline(pwd)
    elif index == 1:
      foo.sendline('yes')
 
    foo.expect('~ #')
 
    for cmd in cmds.split(","):
      foo.sendline(cmd)
 
  except pexpect.EOF:
    foo.close()
  else:
    foo.close
  return ip
 
file = open('serverlist.txt')
try:
  hosts = file.read()
finally:
  file.close()
 
  for host in hosts.split("\n"):
    if host:
      ip,user,pwd,cmds = host.split(":")
      print ssh_cmd(ip,user,pwd,cmds)

dir

def PrintObjectProperties(object, spacing=25):  
    propList = [prop for prop in dir(object) if (not prop.startswith('_') and not callable(getattr(object, prop))) ]
    print( "[%s's Property List:]:"%str(object) )
    print("\n".join(["%s %s %s" %
                     (prop.ljust(spacing),str(type(getattr(object, prop))).ljust(spacing)
                      ,  (str(type(getattr(object, prop)))=="<type 'property'>" and getattr(object, prop).__doc__) or getattr(object, prop))
                     for prop in propList]) )

def PrintObjectMethods(object, spacing=25):  
    methodList = [method for method in dir(object) if (not method.startswith('_') and callable(getattr(object, method))) ]
    print( "[%s's Methods List:]:"%str(object) )
    print("\n".join(["%s %s" %
                     (method.ljust(spacing),
                      getattr(object, method).__doc__)
                     for method in methodList]) )

cx_oracle

从oracle网站下载instantclient-basic-nt-11.2.0.2.0.zip解压到D:\oracle。不要下载lite版本,不支持中文字符集。

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# deal with the utf-8 string operations
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

import os
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
os.environ['PATH'] = 'D:\oracle'

import cx_Oracle

# your code goes here

JSON format

python -m json.tool data.json

默认unicode编码

--- tool.py     Wed Nov 16 11:01:06 2011
+++ tool_old.py Tue Mar  8 09:43:16 2011
@@ -29,7 +29,7 @@
         obj = json.load(infile)
     except ValueError, e:
         raise SystemExit(e)
-    json.dump(obj, outfile, sort_keys=True, indent=4, ensure_ascii=False)
+    json.dump(obj, outfile, sort_keys=True, indent=4)
     outfile.write('\n')

Python IMAP 收取邮件

# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf-8')

import imaplib
from email import message_from_string
from email.header import decode_header

conn = imaplib.IMAP4('imap.qq.com')
conn.login('username', 'password')
conn.select()
typ, data = conn.search(None, 'UnSeen')
for uid in data[0].split():
    msgdata = conn.fetch(uid, "(RFC822)")
    msg = message_from_string(msgdata[1][0][1])
    print '时间: ', msg['Date']
    print '来自: ',
    print ' '.join(x[0] for x in decode_header(msg['From'].replace('"', '')))
    print '发给: ',
    print ' '.join(x[0] for x in decode_header(msg['To'].replace('"', '')))
    # print msg['Message-ID']
    print '主题: ', decode_header(msg['Subject'])[0][0]
    print '正文:'
    for part in msg.walk():
        if not part.is_multipart():
            name = part.get_param('name')
            if not name:
                print part.get_payload(decode=True)