ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

2021-11-27

2021-12-25 09:06:30  阅读:132  来源: 互联网

标签:11 27 -- pred write source json train 2021


 2021SC@SDUSC

本次分析prepare/generate_prepare模块

这是初始化,定义不同实验的batch数量

TRAIN_NUM_BATCHES = int(sys.argv[2])

DEV_NUM_BATCHES = int(sys.argv[3])

TEST_NUM_BATCHES = int(sys.argv[4])

generate_bash这个函数为训练实验创建了5个文件,为开发实验创建了1个文件,为测试实验创建了1个文件。

def generate_bash():

    dataset = "./amr_data/amr_2.0/csqa"

    concept_seed = sys.argv[5]  # question_amr, question_token

    with open("cmd_extract_train1.sh", 'w') as f:

        for i in range(1, 11):

            f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))

        f.write('wait')

    with open("cmd_extract_train2.sh", 'w') as f:

        for i in range(11, 21):

            f.write(

                "python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" % (dataset, dataset, i, concept_seed))

        f.write('wait')

    with open("cmd_extract_train3.sh", 'w') as f:

        for i in range(21, 31):

            f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))

        f.write('wait')

    #

    with open("cmd_extract_train4.sh", 'w') as f:

        for i in range(31, 41):

            f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))

        f.write('wait')

    with open("cmd_extract_train5.sh", 'w') as f:

        for i in range(41, 51):

            f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/train.pred_%d.txt --nprocessors 2 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))

        f.write('wait')

    with open("cmd_extract_dev.sh", 'w') as f:

        for i in range(1, DEV_NUM_BATCHES+1):

            f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/dev.pred_%d.txt --nprocessors 1 --concept_seed %s &\n" %(dataset, dataset, i,concept_seed))

        f.write('wait')

    with open("cmd_extract_test.sh", 'w') as f:

        for i in range(1, TEST_NUM_BATCHES+1):

            f.write("python3 extract_property.py --train_data %s/train.pred.txt --amr_files %s/test.pred_%d.txt --nprocessors 1 --concept_seed %s &\n" %(dataset, dataset, i, concept_seed))

        f.write('wait')

函数copy_files的功能是完成文件之间的复制,并给出异常处理。

def copy_files(source, destination):

    # importing shutil module

    # Copy the content of

    # source to destination

    try:

        shutil.copyfile(source, destination)

        print("File copied successfully.")

        # If source and destination are same

    except shutil.SameFileError:

        print("Source and destination represents the same file.")

        # If destination is a directory.

    except IsADirectoryError:

        print("Destination is a directory.")

        # If there is any permission issue

    except PermissionError:

        print("Permission denied.")

        # For other errors

    except:

        print("Error occurred while copying file.")

函数combine将文件中的字符组合为可索引的序列,分训练、开发、测试三种情况分别处理。

def combine():

    mode = sys.argv[5]

    PATH = '/mnt/cn_data/amr_2.0/csqa/'

    if mode == 'dev':

        with open(PATH + "dev_pred_cn_extended_real_final.json", 'w') as fj:

            fj.write('[')

            for i in range(1, DEV_NUM_BATCHES+1):

                print('i th batch', i)

                try_parse(PATH + "dev.pred_%d_cn_extended_final.json" % i)

                print('done_parsing')

                with open(PATH + "dev.pred_%d_cn_extended_final.json" % i, 'rb') as fp:

                    objects = ijson.items(fp, 'item')

                    for i, line in enumerate(objects):

                        json.dump(line, fj)

                        fj.write(' ,')

        source = PATH + 'dev_pred_cn_extended_real_final.json'

        with open(source, 'rb+') as fj_filehandle:

            # Destination path

            destination = source[:source.index('final.json')] + 'final_original.json'

            copy_files(source, destination)

            fj_filehandle.seek(-1, os.SEEK_END)

            fj_filehandle.truncate()

        open(source, 'a').write("]")

    elif mode == 'test':

        with open(PATH + "test_pred_cn_extended_real_final.json", 'w') as fj:

            fj.write('[')

            for i in range(1, TEST_NUM_BATCHES+1):

                print('i th batch', i)

                try_parse(PATH + "test.pred_%d_cn_extended_final.json" % i)

                print('done_parsing')

                with open(PATH + "test.pred_%d_cn_extended_final.json" % i, 'rb') as fp:

                    objects = ijson.items(fp, 'item')

                    for i, line in enumerate(objects):

                        json.dump(line, fj)

                        fj.write(' ,')

        source = PATH + 'test_pred_cn_extended_real_final.json'

        with open(source, 'rb+') as fj_filehandle:

            # Destination path

            destination = source[:source.index('final.json')] + 'final_original.json'

            copy_files(source, destination)

            fj_filehandle.seek(-1, os.SEEK_END)

            fj_filehandle.truncate()

        open(source, 'a').write("]")

    else:

        with open(PATH + "train_pred_cn_extended_real_final.json", 'w') as fj:

            fj.write('[')

            for i in range(1, TRAIN_NUM_BATCHES + 1):

                print('i th batch', i)

                try_parse(PATH + "train.pred_%d_cn_extended_final.json" % i)

                print('done_parsing')

                with open(PATH + "train.pred_%d_cn_extended_final.json" % i, 'rb') as fp:

                    objects = ijson.items(fp, 'item')

                    for i, line in enumerate(objects):

                        json.dump(line, fj)

                        fj.write(' ,')

        source = PATH + 'train_pred_cn_extended_real_final.json'

        with open(source, 'rb+') as fj_filehandle:

            # Destination path

            destination = source[:source.index('final.json')] + 'final_original.json'

            copy_files(source, destination)

            fj_filehandle.seek(-1, os.SEEK_END)

            fj_filehandle.truncate()

        open(source, 'a').write("]")

标签:11,27,--,pred,write,source,json,train,2021
来源: https://blog.csdn.net/m0_59850509/article/details/122139140

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有