Github地址:https://github.com/Mr-Porridge/Highlight-Spirit
请点个star后大家随便使用
如遇到问题 随时提交issue或联系本人即可 谢谢!
分类号 | 分类内容 |
0 | 保留字 |
1 | 标识符 |
2 | 常量 |
3 | 运算符 |
4 | 分隔符 |
C++保留字49个:
cin,cout,return,extern,public,template,this,
if,else,while,signed,throw,union,this,
int,char,double,float,unsigned,const,goto,
include,for,long,short,virtual,sizeof,static,string,
struct,typedef,break,auto,void,stdio,class,
try,catch,defalt,false,true,virtual,delete
friend,inline,,namespace,new,operator,private,protected
C++额外保留字3个:
endl,printf, scanf,
标识符={以字母或''_''开头的,包含字母、下划线、数字的字符串}
常量={整数、浮点数、布尔型常量、字符型常量}
运算符={算术运算符、关系运算符、逻辑运算符、位运算符、赋值运算符、杂项运算符}
分隔符={'':'', '' ; '', '','', ''('', '')'', ''{'', ''}'', ''['', '']''}
【接着补】 未完待续
# 词法分析精灵
class spirit:
def __init__(self, raw_string: str):
self.raw = raw_string # 初始化原始文件数据
# self.de_space()
# 现在使用直接复制方法初始化 之后使用json进行初始化【之后需要改进】
self.keywords = []
self.init_keywords() # 初始化关键字
self.furnace = '' # 词法分析容器
self.letters = list(string.ascii_letters) # 26*2个字母
self.separators = [',', ';', '(', ')', '{', '}', '[', ']'] # 分隔符
self.underline = '_' # 下划线 用于处理标识符 _hello
self.dash = '-' # 短线 用于 ->
self.sharp = '#' # 井号 用于处理 #include #DEFINE
self.dot = '.' # . 用于处理 小数 和 对象
self.left_slash = '/' # / 用于注释
self.single_quote = '\'' # 单引号
self.double_quote = '\"' # 双引号
self.slash = '\\' # \ 用于转义
self.digits = list(string.digits) # 0~9数字
self.tab = '\t' # 制表符 缩进
self.enter = '\n' # 回车换行
self.single_op = ["%", "!", "^", '&', '|'] # 第一类操作符 包含 && 和 || 拆分成单个渲染即可
self.double_op = ['+', '-', '=', '>', '<', '*', ':'] # 第二类操作符 有可能两个相同 or <= >= += -=
# 存储结果
self.bottle = []
包含详细注释,如有问题可随时联系。
# 词法分析【不断扩充完善】
def analyze(self):
self.furnace = ''
# 蟒生污点 不能修改循环遍历 自建指针
pointer = -1 # pointer初始化为0时 第一个字符无法读取 所以初始化为-1
for i in range(len(self.raw)):
# 判断循环变量与指针的位置
if i <= pointer:
continue
else:
# 读入空格 继续
if self.raw[i] == ' ':
continue
# 读入标识符组成元素或# 判断:
# 1、关键字
# 2、#include #define 等
# 3、标识符
# 4、对象类
elif self.is_letter(self.raw[i]) or self.is_underline(self.raw[i]) or self.is_sharp(self.raw[i]):
# 读入该字符 移动指针
self.furnace += self.raw[i]
i += 1
pointer = i
# 使用 while 读完整个单词和下一位 然后回退一位
# #include 等只能以#打头 中间不允许出现 所以这里不包含self.is_sharp(self.raw[i])
while self.is_letter(self.raw[i]) or self.is_underline(self.raw[i]) or self.is_digit(self.raw[i]):
self.furnace += self.raw[i]
# 修改临时循环变量通过 while 读取下一个字符
i += 1
# 修改自定义指针
pointer = i
# 【重点】多读一位 -> 回退
pointer -= 1
# 1、关键字
if self.furnace in self.keywords:
# print("keyword: ", self.furnace)
self.bottle.append({"category": "keyword", "value": self.furnace})
# 2、#include #DEFINE等
elif self.is_sharp(self.furnace[0]):
# print("sharpe-special", self.furnace)
self.bottle.append({"category": "sharpe-special", "value": self.furnace})
# 3、标识符
else:
# print("word is: ", self.furnace)
self.bottle.append({"category": "word", "value": self.furnace})
self.furnace = ''
continue
# 读取为分隔符
elif self.is_separator(self.raw[i]):
self.furnace = self.raw[i]
# print("separator is: ", self.furnace)
self.bottle.append({"category": "separator", "value": self.furnace})
self.furnace = ''
continue
# 读入- 判断->
elif self.is_dash(self.raw[i]):
self.furnace = self.raw[i]
# -> 有且只有两位
# 所以预读一位 不改变pointer 不需要回退
if self.raw[i + 1] == '>':
self.furnace += self.raw[i + 1]
pointer = i + 1
# print("arrow-special ", self.furnace)
self.bottle.append({"category": "arrow-special", "value": self.furnace})
self.furnace = ''
continue
elif self.raw[i + 1] == '-':
self.furnace += self.raw[i + 1]
pointer = i + 1
# print("arrow-special ", self.furnace)
self.bottle.append({"category": "double-op", "value": self.furnace})
self.furnace = ''
continue
else:
# print("single-op ", self.furnace)
self.bottle.append({"category": "single-op", "value": self.furnace})
self.furnace = ''
continue
# 读入数字
elif self.is_digit(self.raw[i]):
dot_legal = True
# 读入该字符 移动指针
self.furnace += self.raw[i]
i += 1
pointer = i
# 使用 while 读完整个number 然后回退一位
while self.is_digit(self.raw[i]) or self.is_dot(self.raw[i]):
# 判断小数点 并判断是否合法
if self.is_dot(self.raw[i]):
if dot_legal:
dot_legal = False
else:
# 两个小数点 非法 退出循环 直接回退一位
break
self.furnace += self.raw[i]
# 修改临时循环变量通过 while 读取下一个字符
i += 1
# 修改自定义指针
pointer = i
# 【重点】多读一位 -> 回退
pointer -= 1
# print("number: ", self.furnace)
self.bottle.append({"category": "number", "value": self.furnace})
self.furnace = ''
continue
# 读入制表符
elif self.is_tab(self.raw[i]):
# self.bottle.append({"category": "tab", "value": self.raw[i]})
self.bottle.append({"category": "tab", "value": " "})
# print("tab: \\t")
# 读入换行
elif self.is_enter(self.raw[i]):
# self.bottle.append({"category": "enter", "value": self.raw[i]})
self.bottle.append({"category": "enter", "value": '<br>'})
# print("enter: \\n")
# 读入 / 开始判断注释
elif self.is_left_slash(self.raw[i]):
self.furnace += self.raw[i]
i += 1
pointer = i
# 多行注释
if self.raw[i] == '*':
self.furnace += self.raw[i]
while not ((self.is_left_slash(self.raw[i])) and (self.raw[i - 1] == '*')):
i += 1
pointer = i
self.furnace += self.trans(self.raw[i])
# 单行注释
elif self.is_left_slash(self.raw[i]):
self.furnace += self.raw[i]
while not self.is_enter(self.raw[i]):
i += 1
pointer = i
self.furnace += self.trans(self.raw[i])
# 其它则为除号
else:
# print("single operator: ", self.furnace)
pointer -= 1
self.bottle.append({"category": "single-op", "value": self.furnace})
self.furnace = ''
continue
# print("comment: ", self.furnace)
self.bottle.append({"category": "comment", "value": self.furnace})
self.furnace = ''
continue
# 单引号字符串
elif self.is_single_quote(self.raw[i]):
self.furnace += self.raw[i]
i += 1
pointer = i
while not self.is_single_quote(self.raw[i]):
# 转义字符
if self.is_slash(self.raw[i]):
self.furnace += "<strong class=\"escape\">" + self.raw[i] + self.raw[i + 1] + "</strong>"
i += 2
pointer = i
else:
self.furnace += self.raw[i]
i += 1
pointer = i
self.furnace += self.raw[i]
# print("string1: ", self.furnace)
self.bottle.append({"category": "string", "value": self.furnace})
self.furnace = ''
# 双引号字符串
elif self.is_double_quote(self.raw[i]):
self.furnace += self.raw[i]
i += 1
pointer = i
while not self.is_double_quote(self.raw[i]):
# 转义字符
if self.is_slash(self.raw[i]):
self.furnace += "<strong class=\"escape\">" + self.raw[i] + self.raw[i + 1] + "</strong>"
i += 2
pointer = i
else:
self.furnace += self.raw[i]
i += 1
pointer = i
self.furnace += self.raw[i]
# print("string2: ", self.furnace)
self.bottle.append({"category": "string", "value": self.furnace})
self.furnace = ''
# 运算符1
elif self.is_single_op(self.raw[i]):
self.furnace = self.raw[i]
# print("single-op: ", self.furnace)
self.bottle.append({"category": "single-op", "value": self.furnace})
self.furnace = ''
# 运算符2
elif self.is_double_op(self.raw[i]):
if (self.raw[i + 1] == self.raw[i]) or (self.raw[i + 1] == '='):
self.furnace = self.raw[i] + self.raw[i + 1]
i += 2
pointer = i
# print("double operator: ", self.furnace)
self.bottle.append({"category": "double-op", "value": self.furnace})
self.furnace = ''
else:
self.furnace = self.raw[i]
# print("single operator: ", self.furnace)
self.bottle.append({"category": "single-op", "value": self.furnace})
self.furnace = ''
# 读入其他 非法
else:
# print("illegal identifier: ", self.raw[i])
self.bottle.append({"category": "illegal", "value": self.raw[i]})