1. 程式人生 > >Python讀取複雜CSV檔案

Python讀取複雜CSV檔案

class ReadCSV:
	anchor = 0
	final = []
	def __init__(self, path, head=0):
		quote = 0
		row_tmp = ''
		cell_tmp = ''
		cell = []
		channel = file(path,'r')
		data = channel.readlines()
		channel.close()
		reg_quote = re.compile('\"')
		rm_quote = re.compile(r'"([\s\S]*)"')
		db_quote = re.compile(r'""')	
		for line in data:
			quote += len(reg_quote.findall(line))
			if quote%2 == 0:
				quote = 0
				row_tmp += line.strip('\n')
				row_tmp = row_tmp.split(',')
				for col in row_tmp:
					quote+=len(reg_quote.findall(col))
					if quote%2 == 0:
						cell_tmp += col	
						cell_tmp = rm_quote.sub(r'\1',cell_tmp)
						cell_tmp = db_quote.sub('\"',cell_tmp)
						cell.append(cell_tmp)
						cell_tmp = ''
						quote = 0
					else:
						cell_tmp += col+','
				ReadCSV.final.append(cell)
				cell = []
				row_tmp = ''
			else:
				row_tmp+=line
	
	def getCell(self,row,col):
		return ReadCSV.final[row][col]
	
	def getValue(self, title):
		return self.getCell(ReadCSV.anchor, ReadCSV.final[0].index(title))
	
	def next(self):
		if self.done() != True and ReadCSV.anchor < len(ReadCSV.final)-1:
			ReadCSV.anchor += 1
	
	def done(self):
		if ReadCSV.anchor == len(ReadCSV.final)-1:
			return True
		else:
			return False
			
	def reset(self):
		ReadCSV.anchor = 0

CSV file:

Name Age Address
Zhang_san 13 Address1:
1. aaaaa
2. aaad "bbbb",
3. bacad,
adfa"aaa".
Li_si 14 Address2, xxxx
aaaa"
bbbbb".,
Wang_wu 15 Address3

Example:

p = ReadCSV('c:/csvfile.csv')
print p.getCell(1,2)
output:

Address1:
1. aaaaa
2. aaad "bbbb",
3. bacad,
adfa"aaa".

p.next()
print p.getValue('Name')
p.next()
print p.getValue('Address')
output:

Zhang_san

Address2, xxxx
aaaa"
bbbbb".,

p.done()
p.next()
p.next()
p.next()
p.done()
output:

False

True

p.next()
p.next()
print p.getValue('Name')
p.reset()
print p.getValue('Name')
output:

Li_si

Name