需要的包有:
xlrd
此包用于从旧的Excel文件中读取数据和格式信息(即:.xls)
xlwt
此包用于将数据和格式信息写入旧的Excel文件(即:.xls)
openpyxl
用于读取和写入Excel 2010文件的推荐包(即:.xlsx)
xlsxwriter
用于编写数据,格式化信息,特别是Excel 2010格式的图表的替代软件包(即:.xlsx)
xlutils
该软件包收集需要xlrd和xlwt的实用程序,包括复制,修改或过滤现有excel文件的功能。此包提供用于处理的Excel文件的实用程序集合。由于这些实用程序可能需要XLRD和XLWT包中的一个或两个包,因此它们在这里被收集在一起,与两个包分开。这些实用程序在包中分为多个模块。
工具箱安装
xlrd 安装
pip install xlrd
xlwt 安装
pip install xlwt
openpyxl 安装
pip install openpyxl
xlsxwriter 安装
pip install XlsxWriter
xlutils 安装
pip install xlutils
Excel 文件内容结构
Excel 严格的来说是电子表格的一种实现程序,由于在电子表格领域处于垄断地位,我们一般把Excel文件等同于电子表格。
工作簿
工作簿是电子表格的集合。是计算和存储数据的文件,每一个工作簿都由多张工作表组成,用户可以在单个文件中管理各种不同类型的信息,默认情况下,一个工作簿包含3张工作表,分别为Sheet1,Sheet2和Sheet3,现在也不一定了。
工作表
工作表即一张 sheet 表
单元格
工作表中的小方格。
行
在Excel工作表最上方,列出了按照字母顺序排列的格式(A,B,C,D),叫做列。
列
在Excel工作表最左侧,列出了按照数字大小排列的格式(1,2,3,4),叫做行。
Excel其实和数据库的概念类似,数据表、表、记录、行、列、字段。
读取 Excel文件
xlrd 读取文件有如下2种方式
方式1
from xlrd import open_workbookworkbook = open_workbook('simple.xls')
方式2
from xlrd import open_workbookaString = open('simple.xls','rb').read()workbook = open_workbook(file_contents=aString)
读取每个工作表中的单元格的内容
from xlrd import open_workbookwb = open_workbook('simple.xls')for s in wb.sheets(): print('Sheet:',s.name) for row in range(s.nrows): values = [] for col in range(s.ncols): values.append(s.cell(row,col).value) print(''.join(values))
Book的常用属性和方法
from xlrd import open_workbookbook = open_workbook('simple.xls')print(book.nsheets) # 工作表总数print(book.sheet_by_index(2)) #根据index选择sheetprint(book.sheet_names) #list 工作表名称pring(book.sheet_by_name('表1')) #根据sheet name 选择 sheet
Book的其他属性
- codepage
- countries
- user_name
Sheet 的常用属性和方法
from xlrd import open_workbook,cellnamebook = open_workbook('odd.xls')sheet = book.sheet_by_index(0)print(sheet.name) #工作表 名print(sheet.nrows) #工作表 行数print(sheet.ncols) #工作表列数for row_index in range(sheet.nrows): for col_index in range(sheet.ncols): print(cellname(row_index,col_index),'-',) #cell 名 print(sheet.cell(row_index,col_index).value) #cell 值
Sheet 的去其他属性
- col_label_ranges
- row_label_ranges
- visibility
Sheet 的去其他方法
- row_slice(x,y)
- row_values(x,y)
- row_types(0,1,2)
- col_slice(x,y)
- col_values(x,y)
- col_types(0,1,2)
Cell 的常用属性和方法
from xlrd import open_workbook,XL_CELL_TEXTbook = open_workbook('odd.xls')sheet = book.sheet_by_index(1)cell = sheet.cell(0,0) #根据坐标获取一个 Cellprint(cell)print(cell.value) # cell 的 值print(cell.ctype==XL_CELL_TEXT #cell的类型for i in range(sheet.ncols): print sheet.cell_type(1,i),sheet.cell_value(1,i)
Cell 类型
- Text
- Number
- Date
- Boolean
- Boolean
- Empty
- Blank
from datetime import date,datetime,timefrom xlrd import open_workbook,xldate_as_tuplebook = open_workbook('types.xls')sheet = book.sheet_by_index(0)date_value = xldate_as_tuple(sheet.cell(3,2).value,book.datemode)print(datetime(*date_value),date(*date_value[:3]))# 日期datetime_value =xldate_as_tuple(sheet.cell(3,3).value,book.datemode)print(datetime(*datetime_value))# 时间time_value = xldate_as_tuple(sheet.cell(3,4).value,book.datemode)print(time(*time_value[3:]))print(datetime(*time_value))
turn error codes into error messages:
from xlrd import open_workbook,error_text_from_codebook = open_workbook('types.xls')sheet = book.sheet_by_index(0)print(error_text_from_code[sheet.cell(5,2).value])print(error_text_from_code[sheet.cell(5,3).value])from xlrd import open_workbook,empty_cellprint empty_cell.valuebook = open_workbook('types.xls')sheet = book.sheet_by_index(0)empty = sheet.cell(6,2)blank = sheet.cell(7,2)print empty is blank, empty is empty_cell, blank is empty_cellbook = open_workbook('types.xls',formatting_info=True)sheet = book.sheet_by_index(0)empty = sheet.cell(6,2)blank = sheet.cell(7,2)print empty.ctype,repr(empty.value)print blank.ctype,repr(blank.value)
如何处理大型 Excel 文件
from xlrd import open_workbook# 打开 on_demandbook = open_workbook('simple.xls',on_demand=True)for name in book.sheet_names(): if name.endswith('2'): sheet = book.sheet_by_name(name) print(sheet.cell_value(0,0)) book.unload_sheet(name) #不加载 某个 sheet
写入 Excel 文件
一个简小的示例
from tempfile import TemporaryFilefrom xlwt import Workbookbook = Workbook() #新建工作簿sheet1 = book.add_sheet('Sheet 1') #添加工作表1book.add_sheet('Sheet 2') #添加工作表2sheet1.write(0,0,'A1') #添加cellsheet1.write(0,1,'B1')row1 = sheet1.row(1) # 读取 cellrow1.write(0,'A2') #添加 cellrow1.write(1,'B2')sheet1.col(0).width = 10000 #列宽sheet2 = book.get_sheet(1)sheet2.row(0).write(0,'Sheet 2 A1')sheet2.row(0).write(1,'Sheet 2 B1')sheet2.flush_row_data() #flush 数据sheet2.write(1,0,'Sheet 2 A3')sheet2.col(0).width = 5000sheet2.col(0).hidden = Truebook.save('simple.xls') #保存文件# book.save(TemporaryFile())
工作簿编码格式设定
from xlwt import Workbookbook = Workbook(encoding='utf-8')
添加主流cell数据类型的示例
from datetime import date,time,datetimefrom decimal import Decimalfrom xlwt import Workbook,Stylewb = Workbook()ws = wb.add_sheet('Type examples')ws.row(0).write(0,u'xa3')ws.row(0).write(1,'Text')ws.row(1).write(0,3.1415)ws.row(1).write(1,15)ws.row(1).write(2,265L)ws.row(1).write(3,Decimal('3.65'))ws.row(2).set_cell_number(0,3.1415)ws.row(2).set_cell_number(1,15)ws.row(2).set_cell_number(2,265L)ws.row(2).set_cell_number(3,Decimal('3.65'))ws.row(3).write(0,date(2009,3,18))ws.row(3).write(1,datetime(2009,3,18,17,0,1))ws.row(3).write(2,time(17,1))ws.row(4).set_cell_date(0,date(2009,3,18))ws.row(4).set_cell_date(1,datetime(2009,3,18,17,0,1))ws.row(4).set_cell_date(2,time(17,1))ws.row(5).write(0,False)ws.row(5).write(1,True)ws.row(6).set_cell_boolean(0,False)ws.row(6).set_cell_boolean(1,True)ws.row(7).set_cell_error(0,0x17)ws.row(7).set_cell_error(1,'#NULL!')ws.row(8).write(0,'',Style.easyxf('pattern: pattern solid, fore_colour green;'))ws.row(8).write(1,None,Style.easyxf('pattern: pattern solid, fore_colour blue;'))ws.row(9).set_cell_blank(0,Style.easyxf('pattern: pattern solid, fore_colour yellow;'))ws.row(10).set_cell_mulblanks(5,10,Style.easyxf('pattern: pattern solid, fore_colour red;'))wb.save('types.xls')
样式
from datetime import datefrom xlwt import Workbook, XFStyle, Borders, Pattern, Fontfnt = Font() #字体fnt.name = 'Arial'borders = Borders()borders.left = Borders.THICKborders.right = Borders.THICKborders.top = Borders.THICKborders.bottom = Borders.THICKpattern = Pattern()pattern.pattern = Pattern.SOLID_PATTERNpattern.pattern_fore_colour = 0x0Astyle = XFStyle() #样式style.num_format_str='YYYY-MM-DD'style.font = fntstyle.borders = bordersstyle.pattern = patternbook = Workbook()sheet = book.add_sheet('A Date')sheet.write(1,1,date(2009,3,18),style)book.save('date.xls')
更简单的设置样式的方式
from datetime import datefrom xlwt import Workbook, easyxfbook = Workbook()sheet = book.add_sheet('A Date')sheet.write(1,1,date(2009,3,18),easyxf( 'font: name Arial;' 'borders: left thick, right thick, top thick, bottom thick;' 'pattern: pattern solid, fore_colour red;', num_format_str='YYYY-MM-DD' ))book.save('date.xls')
字体
主要属性
- bold
- charset
- colur
- escapement
- family
- height
- italic
- name
- outline
- shadow
- struc_out
- underline
- color_index
- color
aligment
orders
pattern
公式
from xlwt import Workbook, Formulabook = Workbook()sheet1 = book.add_sheet('Sheet 1')sheet1.write(0,0,10)sheet1.write(0,1,20)sheet1.write(1,0,Formula('A1/B1')) #公式sheet2 = book.add_sheet('Sheet 2')row = sheet2.row(0)row.write(0,Formula('sum(1,2,3)'))row.write(1,Formula('SuM(1;2;3)'))row.write(2,Formula("$A$1+$B$1*SUM('ShEEt 1'!$A$1:$b$2)"))book.save('formula.xls')
超链接
from xlwt import Workbook,easyxf,Formulastyle = easyxf('font: underline single')book = Workbook()sheet = book.add_sheet('Hyperlinks')sheet.write( 0, 0, Formula('HYPERLINK("http://www.python.org";"Python")'), style)link = 'HYPERLINK("mailto:python-excel@googlegroups.com";"help")'sheet.write( 1,0, Formula(link), style)book.save("hyperlinks.xls")
图片
from xlwt import Workbookw = Workbook()ws = w.add_sheet('Image')ws.insert_bitmap('python.bmp', 0, 0)w.save('images.xls')images.py
合并单元格
from xlwt import Workbook,easyxfstyle = easyxf( 'pattern: pattern solid, fore_colour red;' 'align: vertical center, horizontal center;' )w = Workbook()ws = w.add_sheet('Merged')ws.write_merge(1,5,1,5,'Merged',style)w.save('merged.xls')
边框
from xlwt import Workbook,easyxftl = easyxf('border: left thick, top thick')t = easyxf('border: top thick')tr = easyxf('border: right thick, top thick')r = easyxf('border: right thick')br = easyxf('border: right thick, bottom thick')b = easyxf('border: bottom thick')bl = easyxf('border: left thick, bottom thick')l = easyxf('border: left thick')w = Workbook()ws = w.add_sheet('Border')ws.write(1,1,style=tl)ws.write(1,2,style=t)ws.write(1,3,style=tr)ws.write(2,3,style=r)ws.write(3,3,style=br)ws.write(3,2,style=b)ws.write(3,1,style=bl)ws.write(2,1,style=l)w.save('borders.xls')
拆分和冻结
Split and Freeze panesfrom xlwt import Workbookfrom xlwt.Utils import rowcol_to_cellw = Workbook()sheet = w.add_sheet('Freeze')sheet.panes_frozen = Truesheet.remove_splits = Truesheet.vert_split_pos = 2sheet.horz_split_pos = 10sheet.vert_split_first_visible = 5sheet.horz_split_first_visible = 40for col in range(20): for row in range(80): sheet.write(row,col,rowcol_to_cell(row,col))w.save('panes.xls')
概要
from xlwt import Workbookdata = [ ['','','2008','','2009'], ['','','Jan','Feb','Jan','Feb'], ['Company X'], ['','Division A'], ['','',100,200,300,400], ['','Division B'], ['','',100,99,98,50], ['Company Y'], ['','Division A'], ['','',100,100,100,100], ['','Division B'], ['','',100,101,102,103], ]w = Workbook()ws = w.add_sheet('Outlines')for i,row in enumerate(data): for j,cell in enumerate(row): ws.write(i,j,cell)ws.row(2).level = 1ws.row(3).level = 2ws.row(4).level = 3ws.row(5).level = 2ws.row(6).level = 3ws.row(7).level = 1ws.row(8).level = 2ws.row(9).level = 3ws.row(10).level = 2ws.row(11).level = 3ws.col(2).level = 1ws.col(3).level = 2ws.col(4).level = 1ws.col(5).level = 2w.save('outlines.xls')
缩放比例
from xlwt import Workbookw = Workbook()ws = w.add_sheet('Normal')ws.write(0,0,'Some text')ws.normal_magn = 75ws = w.add_sheet('Page Break Preview')ws.write(0,0,'Some text')ws.preview_magn = 150ws.page_preview = Truew.save('zoom.xls')
案例实战
以下是可以尝试的任务列表。
- 在表格中插入一行,从现有Excel文件开始,尝试创建一个在给定位置插入行的新Excel文件。
- 将工作簿拆分成表格,从现有Excel文件开始,为原始文件中的每个工作表创建一个包含一个文件的目录。
- 报告Excel文件中的完整目录中的错误,扫描Excel文件的目录并报告任何错误单元的位置。
- 从现有Excel文件开始,创建一个生成新Excel文件的过滤过程,该文件排除包含错误单元格的任何行。生成一个新的Excel文件,其中包含原始文件中存在错误的空单元格。
- 从数据库生成报告,此任务是采用典型的数据库查询并将其转储到Excel文件中,以便在冻结窗格中以良好的对齐方式很好地设置标题行。