编译原理 词法分析程序 C
编译原理词法分析程序C++
#include
#include
#include
usingnamespacestd;
/*全局变量*/
ifstreaminfile;//读取目标字符串
ofstreamoutfile;//向文件中输出结果
charbuf1[64],buf2[64];//双缓冲
char*p2;//指向双缓冲char数组的指针
charci;//用来存储从文件中读取的单个char
stringtoken;//多个ci组成的一个字符串
stringkeyWord[]={"asm","do","if","return","typedef","auto","double","inline","short","typeid","bool","dynamic_cast","int","signed","typename","break","else","long","sizeof",
"union","case","enum","mutable","static","unsigned","catch","explicit","namespace",
"static_cast","using","char","export","new","struct","virtual","class","extern","operator","switch","void","const","false","private","template","volatile","const_cast","float",
"protected","this","wchar_t","continue","for","public","throw","while","default","friend",
"register","true","delete","goto","reinterpret_cast","try","NULL"};//保留字(摘自百度百科)最后一个"NULL是我自己加的"
constintKEYWORDLENGTH=64;//keyWord[]的长度
/*全局变量*/
/*函数声明*/
voidstart();//main函数开始时对程序的预处理
voidget_ci();//获取字符,使用双缓冲
boolisLetter(int);//判断是否是字母
boolisDigit(int);//判断是否是数字
boolisUnderLine(int);//判断是否是下划线
boolisKeyWord(string);//判断是否是关键字
chargetType(char);//获取ci的类型代表
voidprintFile(string,string);//输出至文件
voidretractPoint();//p2指针前移撤销一位
/*函数声明*/
voidmain()
{
start();//预处理,初始化
while(true)//死循环直到主动跳出
{
token="";//每次循环时,清空
get_ci();
switch(getType(ci))
{
case'a'://a是字母的代表,当ci是字母是,进入这个case
{
token=token+ci;
get_ci();
while(isLetter(ci)||isDigit(ci)||isUnderLine(ci)){
token=token+ci;
get_ci();
}//while循环跳出
if(isKeyWord(token))
{
printFile(token,"关键字");
}
else
{
printFile(token,"标识符");
}//此时指针指向了下一个
retractPoint();//所以指针要回退;
}
break;
case'_':
{
token=token+ci;
get_ci();
while(isLetter(ci)||isDigit(ci)||isUnderLine(ci)){
token=token+ci;
get_ci();
}//while循环跳出
if(isKeyWord(token))
{
printFile(token,"关键字");
}
else
{
printFile(token,"标识符");
}//此时指针指向了下一个
retractPoint();//所以指针要回退;
}
break;
case'0':
{
while(isDigit(ci)||ci=='.')
{
token=token+ci;get_ci();
}
printFile(token,"数值");retractPoint();
}
break;
case'
{
get_ci();
if(ci=='=')
{
printFile("
elseif(ci=='
printFile("
else
{
printFile("
}
}
break;
case'/':
{
get_ci();
if(ci=='/')
{
do
{get_ci();
}while(ci!='\n');}
else
{
printFile("/","运算符");retractPoint();
}
}
break;
case'+':
{
get_ci();
if(ci=='=')
{
printFile("+=","运算符");}
elseif(ci=='+'){
printFile("++","运算符");}
else
{
printFile("+","运算符");retractPoint();
}
}
break;
case'-':
{
token=token+ci;get_ci();
if(ci=='=')
{
printFile("-=","运算符");}
elseif(ci=='-'){
printFile("--","运算符");}
elseif(ci>='0'&&ci
while(isDigit(ci)||ci=='.'){
token=token+ci;get_ci();
}
printFile(token,"数值");retractPoint();
}
else
{
printFile("-","运算符");retractPoint();
}
}
break;
case'*':
{
get_ci();
if(ci=='=')
{
printFile("*=","运算符");}
else
{
printFile("*","运算符");retractPoint();}
}
break;
case'=':
{
get_ci();
if(ci=='=')
{
printFile("==","运算符");}
else
{
printFile("=","运算符");retractPoint();}
}
break;
case'>':
{
get_ci();
if(ci=='=')
{
printFile(">=","运算符");}
elseif(ci=='>'){
printFile(">>","运算符");}
else
{
printFile(">","运算符");
}
break;
case'*':
{
get_ci();
if(ci=='=')
{
printFile("*=","运算符");
}
else
{
printFile("*","运算符");
retractPoint();
}
}
break;
case'=':
{
get_ci();
if(ci=='=')
{
printFile("==","运算符");
}
else
{
printFile("=","运算符");
retractPoint();
}
}
break;
case'>':
{
get_ci();
if(ci=='=')
{
printFile(">=","运算符");
}
elseif(ci=='>')
{
printFile(">>","运算符");
}
else
{
printFile(">","运算符");
retractPoint();
}
}
break;
case'(':{printFile("(","界符");break;}
case')':{printFile(")","界符");break;}
case'[':{printFile("[","界符");break;}
case']':{printFile("]","界符");break;}
case';':{printFile(";","界符");break;}
case'.':{printFile(".","界符");break;}
case',':{printFile(",","界符");break;}
case'{':{printFile("{","界符");break;}
case'}':{printFile("}","界符");break;}
case'#':{printFile("#","界符");break;}
case''://空格时,跳出循环
case'\n':
case'\t':
break;
default:
;
}
}
infile.close();
outfile.close();
system("pause");
}
/************************************************************************//*以下是子函数的定义*//************************************************************************/voidstart()
{//此函数用于初始化
cout
token="";//初始化taken
p2=buf1;//指针预指向
buf1[63]=-1;//每个缓冲数据结尾为-1
buf2[63]=-1;
infile.open("in.txt",ios::in);//打开文件
if(!infile)
{
cout
system("pause");
exit(-1);
}
outfile.open("out.txt",ios::out);//输出文件,方式:覆盖
for(inti=0;i
{
infile.get(buf1[i]);
}
}
voidget_ci()
{
if(*p2==-1)//p2指向某一个缓冲buf的结尾处时
{
if(p2==buf1+63)//p2指针的值=buf1的地址+63->说明p2位于第一缓冲区结尾{
for(inti=0;i
{//读取文件到buf2
buf2[i]=infile.get();
p2=buf2;
get_ci();//递归调用自身(只一层而已)
}
}
elseif(p2==buf2+63)//p2指向buf2的结尾时
{
for(inti=0;i
{//读取文件到buf1
buf1[i]=infile.get();
p2=buf1;
get_ci();//递归调用自身(只一层而已)
}
}
else//p2没有指向buf1的结尾,也没有指向buf2的结尾,说明文件读取结束了{
infile.close();
outfile.close();
cout
system("pause");
exit(-1);
}
}
else
{
ci=*p2;
p2=p2+1;
}
}
boolisLetter(intc)//判断是否是字母{
if((c>='A'&&c='a'&&c
returntrue;
}
else
returnfalse;
}
boolisDigit(intc)//判断是否是数字{
if(c>='0'&&c
{
returntrue;
}
else
returnfalse;
}
boolisUnderLine(intc)//判断是否是下划线{
if(c=='_')
{
returntrue;
}
else
returnfalse;
}
boolisKeyWord(strings)//判断是否是关键字{
for(inti=0;i
if(s==keyWord[i])
{
returntrue;
}
}
returnfalse;
}
voidprintFile(strings1,strings2)//输出至文件{
outfile
}
voidretractPoint()//p2指针前移撤销一位{
if(p2==buf1)//若当前指针指向buf1的头{
p2=buf2+63;//则重置为buf2的末尾-1
}
else
{
if(p2==buf2)//若当前指向buf2的头
{
p2=buf1+63;
}
else
p2--;
}
}
chargetType(charc)//获取ci的类型代表{
if((c>='A'&&c='a'&&c
return'a';
}
elseif(c>='0'&&c
{
return'0';
}
else
{
returnc;
}
}