1. 程式人生 > >Python常用的文件讀寫操作和字符串操作

Python常用的文件讀寫操作和字符串操作

dir info load char 編碼 lines resolve values ror


文件讀寫操作

fileUtils.py

# -*- coding: utf-8 -*-

import os


def getFileList(dir, fileList=[]):
"""
遍歷一個目錄,輸出所有文件名
param dir: 待遍歷的文件夾
param filrList : 保存文件名的列表
return fileList: 文件名列表
"""
newDir = dir
if os.path.isfile(dir):
fileList.append(dir)
elif os.path.isdir(dir):
for s in os.listdir(dir):
# 如果需要忽略某些文件夾,使用以下代碼
# if s == "xxx":
# continue
newDir = os.path.join(dir, s)
getFileList(newDir, fileList)
return fileList


def readStrFromFile(filePath):
"""
從文件中讀取字符串str
param filePath: 文件路徑
return string : 文本字符串
"""
with open(filePath, "rb") as f:
string = f.read()
return string


def readLinesFromFile(filePath):
"""
從文件中讀取字符串列表list
param filePath: 文件路徑
return lines : 文本字符串列表
"""
with open(filePath, "rb") as f:
lines = f.readlines()
return lines


def writeStrToFile(filePath, string):
"""
將字符串寫入文件中
param filePath: 文件路徑
param string : 字符串str
"""
with open(filePath, "wb") as f:
f.write(string)


def appendStrToFile(filePath, string):
"""
將字符串追加寫入文件中
param filePath: 文件路徑
param string : 字符串str
"""
with open(filePath, "ab") as f:
f.write(string)


def dumpToFile(filePath, content):
"""
將數據類型序列化存入本地文件
param filePath: 文件路徑
param content : 待保存的內容(list, dict, tuple, ...)
"""
import pickle
with open(filePath, "wb") as f:
pickle.dump(content, f)


def loadFromFile(filePath):
"""
從本地文件中加載序列化的內容
param filePath: 文件路徑
return content: 序列化保存的內容(e.g. list, dict, tuple, ...)
"""
import pickle
with open(filePath) as f:
content = pickle.load(f)
return content


字符串操作

zhuanma.py

# -*- coding: utf-8 -*-
import os
import sys

try:
import pkg_resources
get_module_res = lambda *res: pkg_resources.resource_stream(__name__,os.path.join(*res))
except ImportError:
get_module_res = lambda *res: open(os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__), *res)), ‘rb‘)

PY2 = sys.version_info[0] == 2

default_encoding = sys.getfilesystemencoding()

if PY2:
text_type = unicode
string_types = (str, unicode)

iterkeys = lambda d: d.iterkeys()
itervalues = lambda d: d.itervalues()
iteritems = lambda d: d.iteritems()

else:
text_type = str
string_types = (str,)
xrange = range

iterkeys = lambda d: iter(d.keys())
itervalues = lambda d: iter(d.values())
iteritems = lambda d: iter(d.items())

def strdecode(sentence):
if not isinstance(sentence, text_type):
try:
sentence = sentence.decode(‘utf-8‘)
except UnicodeDecodeError:
sentence = sentence.decode(‘gbk‘, ‘ignore‘)
return sentence

def resolve_filename(f):
try:
return f.name
except AttributeError:
return repr(f)


stringUtils.py

# -*- coding: utf-8 -*-


import zhuanma


def jiema(string):
"""
將字符串轉為unicode編碼
param string: 待轉碼的字符串
return : unicode編碼的字符串
"""
from zhuanma import strdecode
return strdecode(string)


def filterReturnChar(string):
"""
過濾字符串中的"\r"字符
:param string:
:return: 過濾了"\r"的字符串
"""
return string.replace("\r", "")


def encodeUTF8(string):
"""
將字符串轉碼為UTF-8編碼
:param string:
:return: UTF-8編碼的字符串
"""
return jiema(string).encode("utf-8")


def filterCChar(string):
"""
過濾出字符串中的漢字
:param string: 待過濾字符串
:return: 漢字字符串
"""
import re
hanzi = re.compile(u"[\u4e00-\u9fa5]+", re.U)
return "".join(re.findall(hanzi, string))

Python常用的文件讀寫操作和字符串操作