005、用python整理资料

2022-06-15 22:34:04 阅读：215 来源： 互联网

标签：words python content tk 005 整理 path main fm

005、用python整理资料系统规划与管理师备考，常常需要整理考题、解析、答案。一、批量识别图片文字问题描述： 1、能识别微信截图文字。 2、能批量识别指定文件夹下的图片文字。 3、把识别出来的文字以当前日期保存到桌面上。安装aip pip install baidu-aip 调用百度文字识别工具； baidu.py

 1 # *_* coding : UTF-8 *_*
 2 # 开发团队 ：乾龙科技
 3 # 开发人员 ：Administrator
 4 # 开放时间 ：2021/2/17 15:31
 5 # 文件名称 ：baidu.PY
 6 # 开发工具 ：PyCharm
 7 # 程序代码 ：
 8 # 检测键盘、截图
 9 # 检测键盘、截图
10 from aip import AipOcr
11 
12 """ 你的 APPID AK SK """
13 """注册百度文字识别后，获得下面的值"""
14 APP_ID = '*******'
15 API_KEY = '************'
16 SECRET_KEY = '*********************'
17 
18 client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
19 
20 """ 读取图片 """
21 
22 
23 def get_file_content(filePath):
24     with open(filePath, 'rb') as f:
25         return f.read()
26 
27 
28 def get_connect():
29     image = get_file_content('../../pdf/screen.png')
30 
31     """ 调用通用文字识别, 图片参数为本地图片 """
32     content = client.basicGeneral(image)
33     # print(content['words_result'])#
34     image_content = ''
35     for words in content['words_result']:
36         # print(words)
37         image_content += words['words']
38 
39     return image_content
40 
41 
42 def get_connect_mul(path):
43     image = get_file_content(path)
44 
45     """ 调用通用文字识别, 图片参数为本地图片 """
46     content = client.basicGeneral(image)
47     # print(content['words_result'])#
48     image_content = ''
49     for words in content['words_result']:
50         # print(words)
51         image_content += words['words']
52 
53     return image_content
54 
55 
56 # PDF转word
57 
58 
59 if __name__ == '__main__':
60     with open('../../doc/test.doc', 'a') as f:
61         f.write(get_connect())

二、主程序：批量识别图片文字.py

  1 import tkinter as tk
  2 import tkinter.filedialog
  3 from email.header import decode_header
  4 import os
  5 import time
  6 import sys
  7 from PIL import ImageGrab
  8 from baidu import get_connect, get_connect_mul
  9 
 10 
 11 # 字符编码转换
 12 def decode_str(s):
 13     value, charset = decode_header(s)[0]
 14     if charset:
 15         value = value.decode(charset)
 16     return value
 17 
 18 
 19 # “退出”按钮，退出程序
 20 def exit_sys():
 21     sys.exit()
 22 
 23 
 24 # 选择识别的文件夹路径
 25 def select_s():
 26     # 获取选择文件夹路径
 27     foldername = tkinter.filedialog.askdirectory()
 28     # 如果选择了文件夹
 29     if foldername != '':
 30         # 删除en4中的最后一个字符
 31         # len_entry = len(en0.get())
 32         # en0.delete(len_entry - 1)
 33         # 删除en4中的所有字符
 34         en0.delete(0, tkinter.END)
 35         # 将选择的文件路径，插入到en4的第0个字符位置
 36         en0.insert(0, foldername)
 37 
 38 # 保存识别出来的文本内容为桌面文本文件
 39 def save():
 40     # 获取桌面路径
 41     path_three = os.path.join(os.path.expanduser("~"), 'Desktop')
 42     # 获取当前日期字符串
 43     time_now = time.strftime("%Y%m%d", time.localtime())
 44     # 以写方式打开以当前日期命名的文本文件，同时保存到桌面
 45     f = open(f'{path_three}/{time_now}文字识别.txt', 'w')
 46     # 读取tex_character文本框中的所有文字，
 47     # 将该字符串作为初参数交给文件对象的write方法保存到文件。
 48     # 1.0表示第一行，第一列
 49     f.write(tex_character.get(1.0, 'end'))
 50     f.close()
 51 
 52 # 识别通过微信截屏的图片文字
 53 def show_character():
 54     # 调用screen()函数，截屏并保存到专用文件
 55     screen()
 56     # 调用get_connect()函数，读取截屏图片文字并显示到tex_character文本框中
 57     tex_character.insert('end', f'{get_connect()}\n')
 58 
 59 
 60 
 61 # 识别指定文件夹下的多个图片中的文字
 62 def show_mpcharacter():
 63     # 获取指定文件路径
 64     path_one = f'{en0.get()}/'
 65     # 获取指定路径下的所有图片列表
 66     imgs = [path_one + i for i in os.listdir(path_one)]
 67     # 循环调用baidu模块的get_connect_mul()函数识别图片文字，
 68     # 同时把文字插入到tex_character文本框中
 69     for img in imgs:
 70         tex_character.insert('end', f'{get_connect_mul(img)}\n')
 71 
 72 # 截屏图片文字识别，识别单图片文字
 73 def screen():
 74     # 等待键盘事件（同时按下'ctrl+alt+a'三个键）
 75     # print('等待键盘同时按下alt+a三个键')
 76     # keyboard.wait(hotkey='alt+a')
 77     # keyboard.wait(hotkey='enter')
 78     # 图片保存在剪切板里需要时间
 79     time.sleep(2)
 80     # 读取剪切板里面的图片到image
 81     if ImageGrab.grabclipboard():
 82         image = ImageGrab.grabclipboard()
 83         # 保存图片
 84         image.save('../../pdf/screen.png')
 85         # print(image)
 86 
 87 
 88 if __name__ == '__main__':
 89     fm_main = tk.Tk()
 90     fm_main.title("批量识别图片文字V1.0")
 91     # 获取显示区域的宽度
 92     screenWidth = fm_main.winfo_screenwidth()
 93     # 获取显示区域的高度
 94     screenHeight = fm_main.winfo_screenheight()
 95     # 设置窗口宽度
 96     width = 586
 97     # 设置窗口高度
 98     height = 400
 99     left = (screenWidth - width) / 2
100     top = (screenHeight - height) / 2
101     # 宽度x高度+x偏移+y偏移
102     # 在设定宽度和高度的基础上指定窗口相对于屏幕左上角的偏移位置
103     # fm_main.geometry('586x492')
104     fm_main.geometry('%dx%d+%d+%d' % (width, height, left, top))
105     fm_main.resizable(0, 0)  # 设置窗口大小不可变
106 
107     lb0 = tk.Label(fm_main, text="请选择需要文字识别的图片文件夹：")
108     lb0.grid(row=5, column=0, sticky=tk.W)
109     en0 = tk.Entry(fm_main)
110     en0.grid(row=5, column=1, sticky=tk.N + tk.E + tk.W)
111 
112     b0 = tk.Button(fm_main, text='...', command=select_s)
113     b0.grid(row=5, column=2, sticky=tk.N + tk.E + tk.W)
114 
115     b1 = tk.Button(fm_main, text='文件夹多图片文字识别', command=show_mpcharacter)
116     b1.grid(row=6, column=0, sticky=tk.N + tk.E + tk.W)
117 
118     b2 = tk.Button(fm_main, text='截屏图片文字识别', command=show_character)
119     b2.grid(row=6, column=1, sticky=tk.N + tk.E + tk.W)
120 
121     b3 = tk.Button(fm_main, text='保存文本', command=save)
122     b3.grid(row=8, column=0, sticky=tk.N + tk.E + tk.W)
123 
124     # columnspan=2，表示t1占用0,1二列
125     tex_character = tk.Text(fm_main)
126     tex_character.grid(row=7, column=0, columnspan=2, sticky=tk.N + tk.E + tk.W)
127 
128     # 创建滚动条
129     scroll = tk.Scrollbar(orient="vertical", command=tex_character.yview)
130 
131     # 将滚动条填充
132     tex_character.config(yscrollcommand=scroll.set)
133     scroll.grid(row=7, column=2, sticky=tk.S + tk.W + tk.E + tk.N)
134 
135     # 将滚动条与文本框关联
136     scroll['command'] = tex_character.yview
137     # scroll.config(command=t1.yview)
138     # 将滚动条填充
139     tex_character.config(yscrollcommand=scroll.set)
140 
141     b3 = tk.Button(fm_main, text='退出', command=exit_sys)
142     b3.grid(row=8, column=1, sticky=tk.N + tk.E + tk.W)
143 
144     fm_main.mainloop()

三、对识别出来的文本文件进行处理。

 1 import re
 2 import time
 3 import os
 4 
 5 # 获取桌面路径
 6 path_three = os.path.join(os.path.expanduser("~"), 'Desktop')
 7 # 获取当前日期字符串
 8 time_now = time.strftime("%Y%m%d", time.localtime())
 9 
10 # 打开桌面待处理文本文件。
11 # f = open(f'{path_three}/系统规划与管理师/001、信息系统综合知识.txt', 'r')
12 path_one = f'{path_three}/系统规划与管理师/'
13 # print(os.listdir(path_one))
14 for i in os.listdir(path_one):
15     f = open(path_one + i, 'r')
16     p = f.readlines()
17     for s in p:
18         s.strip()
19     # 用join()将列表p转换为字符串all_words
20     all_words = ''.join(p)
21     # 获取题目
22     question = re.findall('\d{1,3}、[\s\S]+?(?=解析：)', all_words)
23     # print(question)
24     for s in range(len(question)):
25         question[s] = question[s] + '\n'
26 
27     word_one = ''.join(question)
28     # 获取解析
29     # answer = re.findall('解析：[\s\S]+?(?=\d{1,3}、)', all_words)
30     answer = re.findall('解析：[\s\S]+?(?=\d{1,3}、|$)', all_words)
31     # print(answer)
32     # for s in answer:
33     #     s.strip()
34     for j in range(1, len(answer) + 1):
35         answer[j - 1] = f'{j}{answer[j - 1]}'
36     word_two = ''.join(answer)
37     print(word_one)
38     # print(word_two)
39     f.close()
40     f1 = open(f'{path_three}/系统规划与管理师（题目及答案）/{i}', 'w')
41     f1.write(word_one)
42     f1.close()
43     # 截取文件名称
44     k=i.split('.')
45     f2 = open(f'{path_three}/系统规划与管理师（题目及答案）/{k[0]}(答案).txt', 'w')
46     f2.write(f'{k[0]}(答案)\n{word_two}')
47     f2.close()

标签：words,python,content,tk,005,整理,path,main,fm
来源： https://www.cnblogs.com/lqsj2018/p/16380110.html

本站声明： 1. iCode9 技术分享网（下文简称本站）提供的所有内容，仅供技术学习、探讨和分享；
2. 关于本站的所有留言、评论、转载及引用，纯属内容发起人的个人观点，与本站观点和立场无关；
3. 关于本站的所有言论和文字，纯属内容发起人的个人观点，与本站观点和立场无关；
4. 本站文章均是网友提供，不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属；如您发现该文章侵犯了您的权益，可联系我们第一时间进行删除；
5. 本站为非盈利性的个人网站，所有内容不会用来进行牟利，也不会利用任何形式的广告来间接获益，纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

ICode9

005、用python整理资料