词法分析器的功能和输出格式 词法分析器的功能是读入测试源程序,输出单词序列。词法分析器的单词常 表示成二元式:(单词种别码,单词在源代码中的字符串)。
词法分析器需要把对象语言的词法规则全部描述出来,在这我们取 C 语 言子集,它的词法定义如下:
(1)关键字 KEYWORD if else int return void while…….. 所有的关键字都是小写。
(2)标识符 IDENTIFIER 标识符的定义采取正则表达式定义方式,标识符由字母、数字、下划线“_” 组成,并且首字母不能是数字。
(3)分隔符 SEPARATER ; , { } [ ] ( )
(4)运算符 OPERATOR + - * / > < =
(5)常数 CONSTANT 简化为只表示整型常数。
(6)词法分析过滤的字符 空格“ ”、制表符“\t”、回车“\r” 和换行符“\n”。
程序的输入要从文件中获取,所以我们要从指定的文件中读取内容,示例内容如下:
void main() { while(a>45){ int x, x1; x=x1+1; } return 0; }
词法分析程序输出如下:
Reading from standard input... 17 void 12 main 46 ( 47 ) 48 { 20 while 46 ( 53 a 34 > 7 45 47 ) 48 { 13 int 53 x 44 , 53 x1 45 ; 53 x 37 = 53 x1 30 + 7 1 45 ; 49 } 14 return 7 0 45 ; 49 }
主要就是要对jjt文件进行添加和修改,在TOKEN部分定义关键字、分隔符、运算符等词法定义;其次就是为了能够从文件中进行读取,我们需要修改mian()方法实例化词法分析器对象,并实现扫描和输出单词序列。
只不过TOKEN的话大部分基础的都已经默认定义了,主要还是添加额外的关键词定义和各自符号的定义。
TOKEN部分代码如下:
TOKEN : /* LITERALS */ { < INTEGER_LITERAL : < DECIMAL_LITERAL > ([ "l", "L" ])? | < HEX_LITERAL > ([ "l", "L" ])? | < OCTAL_LITERAL > ([ "l", "L" ])? > | < #DECIMAL_LITERAL : [ "1"-"9" ] ([ "0"-"9" ])* > | < #HEX_LITERAL : "0" [ "x", "X" ] ([ "0"-"9", "a"-"f", "A"-"F" ])+ > | < #OCTAL_LITERAL : "0" ([ "0"-"7" ])* > } TOKEN : { < CONSTANT : (< DIGIT >)+ ( "." (< DIGIT >)+ )? > } TOKEN : /* KEYWORDS */ { < MAIN : "main" > | < INT : "int" > | < RETURN :"return" > | < IF:"if" > | < ELSE:"else" > | < VOID : "void" > | < DOUBLE:"double" > | < FLOAT:"float" > | < WHILE:"while" > | < DO:"do" > | < FOR:"for" > | < CHAR:"char" > | < STRING:"string" > | < BOOL:"bool" > | < BREAK:"break" > | < SWITCH:"switch" > | < CASE:"case" > | < DEFAULTS:"default" > } TOKEN: /*OPERATORS*/ { < PLUS : "+" > | < MINUS : "-" > | < MULTIPLY : "*" > | < DIVIDE : "/" > | < GD:">" > | < LD:"<" > | < SQRT:"^" > | < EQ:"=" > | < GE:">=" > | < LE:"<=" > | < EQQ:"==" > | < NE:"!=" > | < OR:"||" > | < AND:"&&" > } TOKEN: /* SEPARATER */ { < COMMA:"," > | < SEMICOLON:";" > | < LB:"(" > | < RB:")" > | < BLB:"{" > | < BRB:"}" > | < LBB:"[" > | < RBB:"]" > | < COLON:":" > } TOKEN : /* IDENTIFIERS */ { < IDENTIFIER : < LETTER > ( < LETTER > | < DIGIT > )* > | < #LETTER : [ "_", "a"-"z", "A"-"Z" ] > | < #DIGIT : [ "0"-"9" ] > }
main方法代码如下:
package xx; import java.io.FileInputStream; import java.io.FileReader; import java.io.FileNotFoundException; public class AAA { public static void main(String args []) { System.out.println("Reading from standard input..."); AAA parser = new AAA(System.in); try { //String file = "./output.txt"; // String file = "./zeroone.txt"; String file = "./test.txt"; FileInputStream fin = new FileInputStream(file); SimpleCharStream scs = new SimpleCharStream(fin); AAATokenManager tm = new AAATokenManager(scs); Token to; int i = 0; while((to=tm.getNextToken()).kind!=0) { //to=tm.getNextToken(); //if(to.kind==0)break; System.out.println(to.kind+"\t"+to.toString()); } SimpleNode n = parser.Start(); n.dump(""); System.out.println("Thank you."); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (Exception e) { System.out.println("Oops."); System.out.println(e.getMessage()); } } }
.jjt完整代码如下:
/** * JJTree template file created by SF JavaCC plugin 1.5.28+ wizard for JavaCC 1.5.0+ */ options { static = false; } PARSER_BEGIN(AAA) package xx; import java.io.FileInputStream; import java.io.FileReader; import java.io.FileNotFoundException; public class AAA { public static void main(String args []) { System.out.println("Reading from standard input..."); AAA parser = new AAA(System.in); try { //String file = "./output.txt"; // String file = "./zeroone.txt"; String file = "./test.txt"; FileInputStream fin = new FileInputStream(file); SimpleCharStream scs = new SimpleCharStream(fin); AAATokenManager tm = new AAATokenManager(scs); Token to; int i = 0; while((to=tm.getNextToken()).kind!=0) { //to=tm.getNextToken(); //if(to.kind==0)break; System.out.println(to.kind+"\t"+to.toString()); } SimpleNode n = parser.Start(); n.dump(""); System.out.println("Thank you."); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (Exception e) { System.out.println("Oops."); System.out.println(e.getMessage()); } } } PARSER_END(AAA) SKIP : { " " | "\t" | "\n" | "\r" | < "//" (~[ "\n", "\r" ])* ( "\n" | "\r" | "\r\n" ) > | < "/*" (~[ "*" ])* "*" ( ~[ "/" ] (~[ "*" ])* "*" )* "/" > } TOKEN : /* LITERALS */ { < INTEGER_LITERAL : < DECIMAL_LITERAL > ([ "l", "L" ])? | < HEX_LITERAL > ([ "l", "L" ])? | < OCTAL_LITERAL > ([ "l", "L" ])? > | < #DECIMAL_LITERAL : [ "1"-"9" ] ([ "0"-"9" ])* > | < #HEX_LITERAL : "0" [ "x", "X" ] ([ "0"-"9", "a"-"f", "A"-"F" ])+ > | < #OCTAL_LITERAL : "0" ([ "0"-"7" ])* > } TOKEN : { < CONSTANT : (< DIGIT >)+ ( "." (< DIGIT >)+ )? > } TOKEN : /* KEYWORDS */ { < MAIN : "main" > | < INT : "int" > | < RETURN :"return" > | < IF:"if" > | < ELSE:"else" > | < VOID : "void" > | < DOUBLE:"double" > | < FLOAT:"float" > | < WHILE:"while" > | < DO:"do" > | < FOR:"for" > | < CHAR:"char" > | < STRING:"string" > | < BOOL:"bool" > | < BREAK:"break" > | < SWITCH:"switch" > | < CASE:"case" > | < DEFAULTS:"default" > } TOKEN: /*OPERATORS*/ { < PLUS : "+" > | < MINUS : "-" > | < MULTIPLY : "*" > | < DIVIDE : "/" > | < GD:">" > | < LD:"<" > | < SQRT:"^" > | < EQ:"=" > | < GE:">=" > | < LE:"<=" > | < EQQ:"==" > | < NE:"!=" > | < OR:"||" > | < AND:"&&" > } TOKEN: /* SEPARATER */ { < COMMA:"," > | < SEMICOLON:";" > | < LB:"(" > | < RB:")" > | < BLB:"{" > | < BRB:"}" > | < LBB:"[" > | < RBB:"]" > | < COLON:":" > } TOKEN : /* IDENTIFIERS */ { < IDENTIFIER : < LETTER > ( < LETTER > | < DIGIT > )* > | < #LETTER : [ "_", "a"-"z", "A"-"Z" ] > | < #DIGIT : [ "0"-"9" ] > } SimpleNode Start() : {} { Expression() ";" { return jjtThis; } } void Expression() : {} { AdditiveExpression() } void AdditiveExpression() : {} { MultiplicativeExpression() ( ( "+" | "-" ) MultiplicativeExpression() )* } void MultiplicativeExpression() : {} { UnaryExpression() ( ( "*" | "/" | "%" ) UnaryExpression() )* } void UnaryExpression() : {} { "(" Expression() ")" | Identifier() | Integer() } void Identifier() : {} { < IDENTIFIER > } void Integer() : {} { < INTEGER_LITERAL > }