标签:xlsx aborted paths 10053 Connection file time path dir
# -*- coding: utf-8 -*-
# @Time : 2022/1/11 13:36
import os
import requests
from pathlib import Path
import time
def mkdir(path):
folder = os.path.exists(path)
if not folder: # 判断是否存在文件夹如果不存在则创建为文件夹
os.makedirs(path) # makedirs 创建文件时如果路径不存在会创建这个路径
def get_excel(dir_paths):
"""
pdf文件所在目录
:param dir_paths:
:return:
"""
url = "http://192.168.1.31:56782/table_extract/"
# 获得所有文件
dir_path = Path(dir_paths)
file_paths = list(dir_path.glob('*.*'))
file_paths = [file_path for file_path in file_paths if file_path.suffix.lower() in ['.pdf']]
# 循环调用接口
for file_path_index, file_path in enumerate(file_paths):
print("第{}份文件开始==========================================".format(file_path_index + 1))
print(' [{0} / {1}] 服务开始 {2}...'.format(file_path_index + 1, len(file_paths), file_path))
start_time = time.time()
# file_path = '/Users/jiongjiongai/data/alpha_insight/ocr/kp/ganggu/港股繁体报告/2021123000355_c.pdf'
file_path = Path(file_path)
xlsx_file_path = file_path.with_suffix('.xlsx')
xlsx_name = xlsx_file_path.name
# xlsx_file_dir = r"{}\excel_result".format(dir_path)
xlsx_file_dir = dir_path.joinpath('excel_result')
# xlsx_file = xlsx_file_dir.joinpath(xlsx_name)
# 新建文件夹
mkdir(xlsx_file_dir)
payload = {}
files = [
('file', (file_path.name, open(str(file_path), 'rb'), 'application/pdf'))
]
headers = {'Connection': 'close'}
# with requests.Session() as session:
session = requests.session()
response = session.post(url, headers=headers, data=payload, files=files, timeout=600)
requests.session().close()
# 新建文件
# xlsx_file = r"{}\excel_result\{}".format(dir_path, xlsx_name)
xlsx_file = xlsx_file_dir.joinpath(xlsx_name)
with open(str(xlsx_file), 'wb') as f:
f.write(response.content)
duration_sec = time.time() - start_time
print(' [{0} / {1}] 服务结束 {2} with duration: {3} minutes.'.format(file_path_index + 1, len(file_paths), file_path, int(duration_sec / 60)))
print(" 第{}份文件结束!!!用时{}min".format(file_path_index + 1, int(duration_sec / 60)))
if __name__ == '__main__':
star_time = time.time()
get_excel(r'F:\财富趋势')
end_time = time.time()
print("总计用时:{}min".format(round(int(end_time - star_time) / 60, 2)))
url没写对,少个/,记录一下,搞死了。。。。操
标签:xlsx,aborted,paths,10053,Connection,file,time,path,dir 来源: https://blog.csdn.net/weixin_46392229/article/details/122437273
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。