ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

编译原理-词法分析 python实现

2022-07-02 12:01:35  阅读:197  来源: 互联网

标签:word scanner python list number sign 词法 编译 bit


上学的时候,老师让写实验。我看到这破旧的vc++6.0,心生厌恶,于是申请用python写。
老师同意了,那么就有了我的代码。
我分享出来,希望供大家参考。

词法分析的python代码参考了C语言版本的代码 : https://www.cnblogs.com/zyrblog/p/6885922.html

# 参考C语言代码 : https://www.cnblogs.com/zyrblog/p/6885922.html
import re
import copy

class Scanner(object):
    # 保留字 -- 1
    reserveWord = [
        "auto", "break", "case", "char", "const", "continue",
        "default", "do", "double", "else", "enum", "extern",
        "float", "for", "goto", "if", "int", "long",
        "register", "return", "short", "signed", "sizeof", "static",
        "struct", "switch", "typedef", "union", "unsigned", "void",
        "volatile", "while"
    ]

    # 标识符 -- 2
    # 无符号整形数 -- 3

    # 运算符 -- 4
    operator = [
        "+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
        "!=","&","&&","|","||","%","<<",">>","+="
    ]

    # 分隔符 -- 5
    Delimiter = [
        ";", "(", ")", "^", ",", "\"", "\'","[","]","{","}"
    ]

    # 错误符 -- 6

# ------------------------------------------------------------------------------------------------------------- #

    # 判断是否为保留字 -- 1
    def searchReserve(self, reserveWord):
        if reserveWord in self.reserveWord:
            return True
        else:
            return False # 表示不是保留字,是标识符
        pass

    # 判断是否为字母
    def IsLetter(self, letter):
        if re.match(r'[a-zA-Z\_]', letter): # 正则表达式
            return True
        else:
            return False

    # 判断是否为数字
    def IsDigit(self, digit):
        if re.match(r'\d', digit):
            return True
        else:
            return False
        pass

    # 判断是否为运算符或者分隔符
    def IsSign(self, Sign):
        if Sign in self.Delimiter:
            return 5
        elif Sign in self.operator:
            return 4
        else:
            return -1

    # 过滤器,过滤掉注释
    def filterResource(self, code):
        note = 0
        code_temp=[]
        for line in range(len(code)):
            a = ''
            s_line = code[line]
            i = -1
            while i < len(s_line)-1:
                i = i + 1
                if i<=len(s_line)-2 and s_line[i]=='/' and s_line[i+1]=='/' and note == 0:
                    break    # 跳过单行注释

                if i<=len(s_line)-2 and s_line[i]=='/' and s_line[i+1]=='*':
                    note = 1
                    continue   # 注释开始

                if i<=len(s_line)-2 and s_line[i]=='*' and s_line[i+1]=='/':
                    note = 0
                    i = i + 2
                    continue # 注释结束

                # 跳过无用符号
                if note == 0 and s_line[i]!='\t' and s_line[i]!='\n' and s_line[i]!='\v' and s_line[i]!='\r':
                    a = a + s_line[i]

            # print(a)
            if a != '':
                code_temp.append(a)

        code = copy.deepcopy(code_temp)

        return code


def clear_number(number_list):
    number_list[0] = number_list[1] = 0
    pass

def clear_sign(sign_list):
    sign_list[0] = ''
    sign_list[1] = 0

def clear_word(word):
    word = ''

# 主程序
# 读取文件
scanner = Scanner()
code = []
with open('D:/test.txt', 'r',encoding='UTF-8') as f: # 读取
    for line in f.readlines():
        code.append(line.strip())

    # 代码过滤
    code = scanner.filterResource(code)
    # print(code)

    # 代码识别:
    # 字符分为符号和非符号,非符号之间用空格隔开,符号和非符号之间不需要隔开
    # 利用空格或者符号进行识别
    for line in code:
        word = ''
        number = 0
        number_e = 0

        sign = ''
        sign_e = 0
        number_list = [number, number_e]
        sign_list = [sign, sign_e]

        number_or_word = 0
        number_plus = 1

        i = -1
        while i < len(line)-1:
            i=i+1
            bit = line[i]
            number_or_word = 0

            # (识别符号)符号打头,顺便去除符号-前面-的字母或者数字
            if scanner.IsSign(bit) > 0 and sign_list[1] > 0 and scanner.IsSign(sign_list[0]+bit) < 0:
                # 如果两个连续的符号不是符号,输出第一个符号,继续
                print('(', sign_list[1], ',"', sign_list[0], '")')
                clear_sign(sign_list)

            if scanner.IsSign(bit) > 0: # 把符号保存
                sign_list[0] = sign_list[0] + bit
                sign_list[1] = scanner.IsSign(bit)
            if scanner.IsSign(bit) > 0 and word != '':
                # 字母+符号,输出字母
                if scanner.searchReserve(word) == True:
                    print('(', 1, ',"', word, '")')
                else:
                    print('(', 2, ',"', word, '")')
                word = ''
                number_or_word=0
                # number_plus = 1
            elif scanner.IsSign(bit) > 0 and number_list[1] != 0:
                # 数字+符号,输出数字
                print('(', 3, ',"', number_plus*number_list[0], '")')
                clear_number(number_list)
                number_or_word = 0
                number_plus = 1


            #字母打头(识别单词)
            if scanner.IsLetter(bit) and sign_list[1] > 0:
                # 符号 + 字母,识别符号,继续字母
                print('(', sign_list[1], ',"', sign_list[0], '")')
                clear_sign(sign_list)


            if scanner.IsLetter(bit) and number_list[1]==0:   # 遇见字母
                word = word + bit
                number_or_word = 1    # 标识前一个字符是单词
                continue
            elif word != '' and scanner.IsDigit(bit): #字母加数字
                word = word + str(bit)
                continue
            elif word != '' and bit == ' ': # 字母加空格
                if scanner.searchReserve(word) == True:
                    print('(', 1, ',"', word, '")')
                else:
                    print('(', 2, ',"', word, '")')
                word = ''
                continue


            # 数字打头(识别数字)
            if scanner.IsDigit(bit)==True and sign_list[1] > 0:
                # 符号 + 数字,识别符号,继续数字
                # 在这里识别正负号!!
                if sign_list[0] not in ['+', '-']:  # 如果符号不是正负号
                    print('(', sign_list[1], ',"', sign_list[0], '")')
                    clear_sign(sign_list)
                elif number_or_word > 0:# 如果前面存在数字或者单词,那么这个符号就是运算符
                    pass
                elif number_or_word == 0:#符号是正负号,纳入数字
                    number_plus = int(sign_list[0]+'1 ')
                    clear_sign(sign_list)
                # number_or_word = 1

            if scanner.IsDigit(bit)==True and word == '':  # 数字打头
                number_list[1] = 1
                number_list[0] = number_list[0] * 10 + int(bit)
                number_or_word = 1  # 标识前一个字符是数字
            elif number_list[1] == 1 and scanner.IsLetter(bit):
                # 说明是数字 + 字母 ,是错误的
                print('Error : (', 6, ',"', str(number_list[0]) + bit, '")')
                clear_number(number_list)
                clear_sign(sign_list)
                word = ''
            elif number_list[1] == 1 and bit == ' ':
                # 遇到空格,数字识别成功,输出数字
                print('(', 3, ',"', number_plus*number_list[0], '")')
                number_plus = 1
                clear_number(number_list)

            if bit == ' ' and sign_list[1] != 0:
                # 符号加空格,输出符号,清空标志,继续
                print('(', sign_list[1], ',"', sign_list[0], '")')
                clear_sign(sign_list)
            if i == len(line) - 1:
                print('(', sign_list[1], ',"', sign_list[0], '")')

标签:word,scanner,python,list,number,sign,词法,编译,bit
来源: https://www.cnblogs.com/amtop/p/16436985.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有