ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

python-excel处理

2021-03-21 20:59:56  阅读:181  来源: 互联网

标签:index python worksheet excel list cell 处理 workbook output


'''
要确定excel表格中的工作簿中工作表的数量、名称和每个工作表中行列的数量
open_workbook 模块用来读取和分析Excel文件
'''
import sys
from xlrd import open_workbook
input_file=sys.argv[1]
workbook=open_workbook(input_file)
print("Number of worksheet:",workbook.nsheets)
for worksheet in workbook.sheets():
    print('Worksheet name:',worksheet.name,'  number of rows in worksheet:',worksheet.nrows,'  number of cols in worksheet',worksheet.ncols)


'''
处理单个工作表
读写Excel文件
worksheet.cell_value();返回数据
'''
import sys
from xlrd import open_workbook
from xlwt import Workbook
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy1')
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    for row_index in range(worksheet.nrows):
        for column_index in range(worksheet.ncols):
            output_worksheet.write(row_index,column_index,worksheet.cell_value(row_index,column_index))
output_workbook.save(output_file)


'''
处理单个工作表
读写Excel文件-格式化数据
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy2')
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    for row_index in range(worksheet.nrows):
        row_list_output=[]
        for column_index in range(worksheet.ncols):
            if worksheet.cell_type(row_index,column_index)==3:
                date_cell=xldate_as_tuple(worksheet.cell_value(row_index,column_index),workbook.datemode)
                date_cell=date(*date_cell[0:3]).strftime('%m-%d-%Y')
                row_list_output.append(date_cell)
                output_worksheet.write(row_index,column_index,date_cell)
            else:
                non_date_cell=worksheet.cell_value(row_index,column_index)
                row_list_output.append(non_date_cell)
                output_worksheet.write(row_index,column_index,non_date_cell)
output_workbook.save(output_file)


'''
筛选特定行-行中的值满足某个条件
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy3')
sales_amount_index=3
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[]
    header=worksheet.row_values(0)
    data.append(header)
    for row_index in range(1,worksheet.nrows):
        row_list=[]
        sales_amount=worksheet.cell_value(row_index,sales_amount_index)
        if sales_amount>1400.0:
            for column_index in range(worksheet.ncols):
                cell_value=worksheet.cell_value(row_index,column_index)
                cell_type=worksheet.cell_type(row_index,column_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
筛选特定行-行中的值属于某个集合
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy4')
important_dates=['01/24/2013', '01/31/2013']
purchase_date_index=4
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    heade=worksheet.row_values(0)
    data=[]
    data.append(heade)
    for row_index in range(1,worksheet.nrows):
        purchase_datetime=xldate_as_tuple(worksheet.cell_value(row_index,purchase_date_index),workbook.datemode)
        purchase_date=date(*purchase_datetime[0:3]).strftime('%m/%d/%Y')
        row_list=[]
        if purchase_date in important_dates:
            for column_index in range(worksheet.ncols):
                cell_value=worksheet.cell_value(row_index,column_index)
                cell_type=worksheet.cell_type(row_index,column_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%m/%d/%Y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
筛选特定行-行中的值属匹配于某个格式(正则表达式)
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
import re
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy5')
pattern=re.compile(r'(?P<my_pattern>^J.*)')
customer_name_column_index=1
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[]
    head=worksheet.row_values(0)
    data.append(head)
    for row_index in range(1,worksheet.nrows):
        row_list=[]
        if pattern.search(worksheet.cell_value(row_index,customer_name_column_index)):
            for column_index in range(worksheet.ncols):
                cell_value=worksheet.cell_value(row_index,column_index)
                cell_type=worksheet.cell_type(row_index,column_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for elemnt_list,element in enumerate(output_list):
            output_worksheet.write(list_index,elemnt_list,element)
output_workbook.save(output_file)


'''
选取特定的列-列索引值
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy6')
want_column=[1,4]
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[]
    for row_index in range(worksheet.nrows):
        row_list=[]
        for column_index in want_column:
            cell_value=worksheet.cell_value(row_index,column_index)
            cell_type=worksheet.cell_type(row_index,column_index)
            if cell_type==3:
                date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                row_list.append(date_cell)
            else:
                row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
选取特定的列-列标题
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
putput_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy7')
want_columns=['Customer ID', 'Purchase Date']
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[want_columns]
    header_list=worksheet.row_values(0)
    header_index_list=[]
    for header_index in range(len(header_list)):
        if header_list[header_index] in want_columns:
            header_index_list.append(header_index)
    for row_index in range(1,worksheet.nrows):
        row_list=[]
        for column_index in header_index_list:
            cell_value=worksheet.cell_value(row_index,column_index)
            cell_type=worksheet.cell_type(row_index,column_index)
            if cell_type==3:
                date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                row_list.append(date_cell)
            else:
                row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for index_list,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(index_list,element_list,element)
output_workbook.save(putput_file)


'''
在所有工作表中筛选特定行
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_wroksheet=output_workbook.add_sheet('copy8')
sales_index=3
threshold=2000.0
first_worksheet=True
with open_workbook(input_file) as workbook:
    data=[]
    for worksheet in workbook.sheets():
        if first_worksheet:
            heade=worksheet.row_values(0)
            data.append(heade)
            first_worksheet=False
        for row_index in range(1,worksheet.nrows):
            row_list=[]
            sale_amount=worksheet.cell_value(row_index,sales_index)
            if sale_amount>threshold:
                for column_index in range(worksheet.ncols):
                    cell_value = worksheet.cell_value(row_index, column_index)
                    cell_type=worksheet.cell_type(row_index,column_index)
                    if cell_type==3:
                        date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                        date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                        row_list.append(date_cell)
                    else:
                        row_list.append(cell_value)
            if row_list:
                data.append(row_list)
    for index_list,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_wroksheet.write(index_list,element_list,element)
output_workbook.save(output_file)


'''
在所有工作表中选定特定的列
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy9')
want_columns=['Customer Name', 'Sale Amount']
first_worksheet=True
with open_workbook(input_file) as workbook:
    data=[want_columns]
    index_of_cols_to_keep=[]
    for worksheet in workbook.sheets():
        if first_worksheet:
            header=worksheet.row_values(0)
            for columns_index in range(len(header)):
                if header[columns_index] in want_columns:
                    index_of_cols_to_keep.append(columns_index)
            first_worksheet=False
        for row_index in range(1,worksheet.nrows):
            row_list=[]
            for columns_index in index_of_cols_to_keep:
                cell_value=worksheet.cell_value(row_index,columns_index)
                cell_type=worksheet.cell_type(row_index,columns_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%d-%m-%y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
            if row_list:
                data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
在excel工作簿中读取一组工作表
在一组工作表中筛选特定行
从第一个和第二个工作表中筛选出销售额大于 $1900.00 的那些行
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy10')
threshold=1900.0
want_sheet=[0,1]
want_columns_index=3
first_worksheet=True
with open_workbook(input_file) as workbook:
    data=[]
    for sheet_index in range(workbook.nsheets):
        if sheet_index in want_sheet:
            worksheet=workbook.sheet_by_index(sheet_index)
            if first_worksheet:
                header_row=worksheet.row_values(0)
                data.append(header_row)
                first_worksheet=False
            for row_index in range(1,worksheet.nrows):
                row_list=[]
                sale_amount=worksheet.cell_value(row_index,want_columns_index)
                if sale_amount>threshold:
                    for column_index in range(worksheet.ncols):
                        cell_value=worksheet.cell_value(row_index,column_index)
                        cell_type=worksheet.cell_type(row_index,column_index)
                        if cell_type==3:
                            date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                            date_cell=date(*date_cell[0:3]).strftime('%d-%m-%y')
                            row_list.append(date_cell)
                        else:
                            row_list.append(cell_value)
                if row_list:
                    data.append(row_list)
    for list_index ,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
处理多个工作簿
工作表计数以及每个工作表中的行列计数
想知道一个文件夹中工作簿的数量,每个工作簿中工作表的数量,以及每个工作表中行与列的数量
'''
import glob
import sys
import os
from xlrd import open_workbook
input_directionary=sys.argv[1]
workbook_counter=0
for input_file in glob.glob(os.path.join(input_directionary,'*.xls*')):
    workbook=open_workbook(input_file)
    print('Workbook Name: %s ' % os.path.basename(input_file))
    print('Number of worksheet: %d' % workbook.nsheets)
    for worksheet in workbook.sheets():
        print('Worksheet Name:',worksheet.name,' worksheet rows: ',worksheet.nrows,' worksheet cols:',worksheet.ncols)
    workbook_counter=workbook_counter+1
print('Number of workbook: %d' %(workbook_counter))


'''
从多个工作簿中连接数据
将多个工作簿中所有工作表的数据垂直连接成一个输出文件
'''
import glob
import os
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_directionary=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy11')
data=[]
first_worksheet=True
for input_file in glob.glob(os.path.join(input_directionary,'*.xls*')):
    print('Workbook Name:',os.path.basename(input_file))
    with open_workbook(input_file) as workbook:
        for worksheet in workbook.sheets():
            if first_worksheet:
                header_row=worksheet.row_values(0)
                data.append(header_row)
                first_worksheet=False
            for row_index in range(1,worksheet.nrows):
                row_list=[]
                for column_index in range(worksheet.ncols):
                    ceLl_value=worksheet.cell_value(row_index,column_index)
                    ceLl_type=worksheet.cell_type(row_index,column_index)
                    if ceLl_type==3:
                        date_cell=xldate_as_tuple(ceLl_value,workbook.datemode)
                        date_cell=date(*date_cell[0:3]).strftime('%d-%m-%y')
                        row_list.append(date_cell)
                    else:
                        row_list.append(ceLl_value)
                if row_list:
                    data.append(row_list)
for list_index ,output_list in enumerate(data):
    for element_list,element in enumerate(output_list):
        output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
为每个工作簿和工作表计算总数和均值
为多个工作簿计算工作表级别和工作簿级别的统计量
'''
import glob
import os
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
input_floder=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worsheet=output_workbook.add_sheet('sums_and_avergae')
all_data=[]
sales_amount_index=3
header=['workbook', 'worksheet', 'worksheet_total','worksheet_average','workbook_total','workbook_average']
all_data.append(header)
for input_file in glob.glob(os.path.join(input_floder,'*.xls*')):
    with open_workbook(input_file) as workbook:
        list_of_totals=[]
        list_of_numbers=[]
        workbook_output=[]
        for worksheet in workbook.sheets():
            total_sales=0
            number_of_sales=0
            worksheet_list=[]
            worksheet_list.append(os.path.basename(input_file))
            worksheet_list.append(worksheet.name)
            for row_index in range(1,worksheet.nrows):
                try:
                    total_sales=total_sales+float(str(worksheet.cell_value(row_index,sales_amount_index)).strip('$').replace(',',''))
                    number_of_sales=number_of_sales+1
                except:
                    total_sales=total_sales+0
                    number_of_sales=number_of_sales+0
            average_sales='%.2f' % (total_sales/number_of_sales)
            worksheet_list.append(average_sales)
            worksheet_list.append(float(average_sales))
            list_of_totals.append(total_sales)
            list_of_numbers.append(float(number_of_sales))
            workbook_output.append(worksheet_list)
        workbook_total=sum(workbook_total)
        workbook_average=sum(list_of_totals)/sum(list_of_numbers)
        for list_element in workbook_output:
            list_element.append(worksheet_list)
            list_element.append(workbook_average)
        all_data.extend(workbook_output)
for list_index,output_list in enumerate(all_data):
    for element_list,element in enumerate(output_list):
        output_worsheet.write(list_index,element_list,element)
output_workbook.save(output_file)

标签:index,python,worksheet,excel,list,cell,处理,workbook,output
来源: https://blog.csdn.net/LiYeye1234567890/article/details/115055763

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有