标签:11 27 -- pred write source json train 2021
2021SC@SDUSC
本次分析prepare/generate_prepare模块
这是初始化,定义不同实验的batch数量
TRAIN_NUM_BATCHES = int(sys.argv[2])
DEV_NUM_BATCHES = int(sys.argv[3])
TEST_NUM_BATCHES = int(sys.argv[4])
generate_bash这个函数为训练实验创建了5个文件,为开发实验创建了1个文件,为测试实验创建了1个文件。
def generate_bash():
dataset = "./amr_data/amr_2.0/csqa"
concept_seed = sys.argv[5] # question_amr, question_token
with open("cmd_extract_train1.sh", 'w') as f:
for i in range(1, 11):
f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))
f.write('wait')
with open("cmd_extract_train2.sh", 'w') as f:
for i in range(11, 21):
f.write(
"python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" % (dataset, dataset, i, concept_seed))
f.write('wait')
with open("cmd_extract_train3.sh", 'w') as f:
for i in range(21, 31):
f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))
f.write('wait')
#
with open("cmd_extract_train4.sh", 'w') as f:
for i in range(31, 41):
f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))
f.write('wait')
with open("cmd_extract_train5.sh", 'w') as f:
for i in range(41, 51):
f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))
f.write('wait')
with open("cmd_extract_dev.sh", 'w') as f:
for i in range(1, DEV_NUM_BATCHES+1):
f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/dev.pred_%d.txt --nprocessors 1 --concept_seed %s &\n" %(dataset, dataset, i,concept_seed))
f.write('wait')
with open("cmd_extract_test.sh", 'w') as f:
for i in range(1, TEST_NUM_BATCHES+1):
f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/test.pred_%d.txt --nprocessors 1 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))
f.write('wait')
函数copy_files的功能是完成文件之间的复制,并给出异常处理。
def copy_files(source, destination):
# importing shutil module
# Copy the content of
# source to destination
try:
shutil.copyfile(source, destination)
print("File copied successfully.")
# If source and destination are same
except shutil.SameFileError:
print("Source and destination represents the same file.")
# If destination is a directory.
except IsADirectoryError:
print("Destination is a directory.")
# If there is any permission issue
except PermissionError:
print("Permission denied.")
# For other errors
except:
print("Error occurred while copying file.")
函数combine将文件中的字符组合为可索引的序列,分训练、开发、测试三种情况分别处理。
def combine():
mode = sys.argv[5]
PATH = '/mnt/cn_data/amr_2.0/csqa/'
if mode == 'dev':
with open(PATH + "dev_pred_cn_extended_real_final.json", 'w') as fj:
fj.write('[')
for i in range(1, DEV_NUM_BATCHES+1):
print('i th batch', i)
try_parse(PATH + "dev.pred_%d_cn_extended_final.json" % i)
print('done_parsing')
with open(PATH + "dev.pred_%d_cn_extended_final.json" % i, 'rb') as fp:
objects = ijson.items(fp, 'item')
for i, line in enumerate(objects):
json.dump(line, fj)
fj.write(' ,')
source = PATH + 'dev_pred_cn_extended_real_final.json'
with open(source, 'rb+') as fj_filehandle:
# Destination path
destination = source[:source.index('final.json')] + 'final_original.json'
copy_files(source, destination)
fj_filehandle.seek(-1, os.SEEK_END)
fj_filehandle.truncate()
open(source, 'a').write("]")
elif mode == 'test':
with open(PATH + "test_pred_cn_extended_real_final.json", 'w') as fj:
fj.write('[')
for i in range(1, TEST_NUM_BATCHES+1):
print('i th batch', i)
try_parse(PATH + "test.pred_%d_cn_extended_final.json" % i)
print('done_parsing')
with open(PATH + "test.pred_%d_cn_extended_final.json" % i, 'rb') as fp:
objects = ijson.items(fp, 'item')
for i, line in enumerate(objects):
json.dump(line, fj)
fj.write(' ,')
source = PATH + 'test_pred_cn_extended_real_final.json'
with open(source, 'rb+') as fj_filehandle:
# Destination path
destination = source[:source.index('final.json')] + 'final_original.json'
copy_files(source, destination)
fj_filehandle.seek(-1, os.SEEK_END)
fj_filehandle.truncate()
open(source, 'a').write("]")
else:
with open(PATH + "train_pred_cn_extended_real_final.json", 'w') as fj:
fj.write('[')
for i in range(1, TRAIN_NUM_BATCHES + 1):
print('i th batch', i)
try_parse(PATH + "train.pred_%d_cn_extended_final.json" % i)
print('done_parsing')
with open(PATH + "train.pred_%d_cn_extended_final.json" % i, 'rb') as fp:
objects = ijson.items(fp, 'item')
for i, line in enumerate(objects):
json.dump(line, fj)
fj.write(' ,')
source = PATH + 'train_pred_cn_extended_real_final.json'
with open(source, 'rb+') as fj_filehandle:
# Destination path
destination = source[:source.index('final.json')] + 'final_original.json'
copy_files(source, destination)
fj_filehandle.seek(-1, os.SEEK_END)
fj_filehandle.truncate()
open(source, 'a').write("]")
标签:11,27,--,pred,write,source,json,train,2021 来源: https://blog.csdn.net/m0_59850509/article/details/122139140
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。