一个面向对像语言的编译器

源代码在线查看: scanner.l

软件大小: 261 K
上传用户: wuweixiong123
关键词: 语言 编译器
下载地址: 免注册下载 普通下载 VIP

相关代码

				/*				 * File:  scanner.l				 * ----------------				 * Lex inupt file to generate the scanner for the compiler.				 */								%{								#include "scanner.h"				#include "utility.h" // for PrintDebug()				#include "errors.h"				#include 				#include "parser.h"								#define TAB_SIZE 8								/* Global variables				 * ----------------				 * (For shame!) But we need a few to keep track of things that are				 * preserved between calls to yylex or used outside the scanner.				 */				static int curLineNum, curColNum;				static char curLine[512];								static void DoBeforeEachAction(); 				#define YY_USER_ACTION DoBeforeEachAction();								%}								/* States				 * ------				 * Our strategy for handling nested comments uses two lex states (N & C)				 * N = Normal (not inside a comment, we start in this state)				 * C = Comment (currently inside a comment)				 * Both are inclusive states (i.e. apply when explicitly named or none named)				 * Most rules will trigger when in normal mode, e.g. processing keywords				 * and identifiers only happens outside a comment. A few rules apply				 * when inside a comment (end-comment, EOF), and a few rules are used				 * in both states (ignoring whitespace, counting newlines, starting				 * another comment).  To track nesting depth, we turn on the stack option				 * so we can use lex's state stack. Each time we find a comment start,				 * we push a comment state, each time we find an end-comment, we pop.				 * Eventually this will return to the normal state in which we started.				 * (We could have also have tracked this with our own integer counter).				 * Another little wrinkle on states is the COPY exclusive state which				 * I added to first match each line and copy it to a saved buffer				 * before re-processing it. This allows us to print the entire line				 * to provide context on errors.				 */				%option stack				%s N C				%x COPY								/* Definitions				 * -----------				 * To make our rules more readable, we establish some definitions here.				 */				DIGIT             ([0-9])				HEX_DIGIT         ([0-9a-fA-F])				HEX_INTEGER       (0[Xx]{HEX_DIGIT}+)				INTEGER           ({DIGIT}+)				EXPONENT          ([Ee][-+]?{INTEGER})				DOUBLE            ({INTEGER}"."{DIGIT}*{EXPONENT}?)				BEG_STRING        (\"[^"\n]*)				STRING            ({BEG_STRING}\")				IDENTIFIER        ([a-zA-Z][a-zA-Z_0-9]*)				OPERATOR          ([+\-*/%=\\.,;!()\[\]{}])				BEG_COMMENT       ("/*")				END_COMMENT       ("*/")				SINGLE_COMMENT    ("//"[^\n]*)								%%             /* BEGIN RULES SECTION */								.*               { strncpy(curLine, yytext, sizeof(curLine));				                         curColNum = 1;				                         yy_pop_state(); yyless(0); }				          { yy_pop_state();}				\n                  { curLineNum++; curColNum = 1;				                         if (YYSTATE != COPY) yy_push_state(COPY); }								[ ]+                   { /* ignore all spaces in normal or comment */  }				[\t]                   { curColNum += TAB_SIZE - curColNum%TAB_SIZE + 1; }																				 /* -------------------- Comments ----------------------------- */				{BEG_COMMENT}          { yy_push_state(C); }				{END_COMMENT}       { yy_pop_state(); }				             { ReportError(&yylloc, err_unterm_comment);				                         return 0; }				[^*\n\t/]*          { /* grab all non-star, non-slash, non-newline */}				.                   { /* ignore everything else that doesn't match */ }				{SINGLE_COMMENT}    { /* skip to end of line for // comment */ } 												 /* --------------------- Keywords ------------------------------- */				"void"              { return T_Void;        }				"int"               { return T_Int;         }				"double"            { return T_Double;      }				"bool"              { return T_Bool;        }				"string"            { return T_String;      }				"null"              { return T_Null;        }				"class"             { return T_Class;       }				"extends"           { return T_Extends;     }				"this"              { return T_This;        }				"while"             { return T_While;       }				"for"               { return T_For;         }				"if"                { return T_If;          }				"else"              { return T_Else;        }				"return"            { return T_Return;      }				"break"             { return T_Break;       }				"New"               { return T_New;         }				"NewArray"          { return T_NewArray;    }				"Print"             { return T_Print;       }				"ReadInteger"       { return T_ReadInteger; }				"ReadLine"          { return T_ReadLine;    }																 /* -------------------- Operators ----------------------------- */				"				">="                { return T_GreaterEqual;}				"=="                { return T_Equal;       }				"!="                { return T_NotEqual;    }				"&&"                { return T_And;         }				"||"                { return T_Or;          }				{OPERATOR}          { return yytext[0];     }												 /* -------------------- Constants ------------------------------ */				"true"|"false"      { yylval.boolConstant = (yytext[0] == 't');				                         return T_BoolConstant; }				{INTEGER}           { yylval.integerConstant = strtol(yytext, NULL, 10);				                         return T_IntConstant; }				{HEX_INTEGER}       { yylval.integerConstant = strtol(yytext, NULL, 16);				                         return T_IntConstant; }				{DOUBLE}            { yylval.doubleConstant = atof(yytext);				                         return T_DoubleConstant; }				{STRING}            { yylval.stringConstant = strdup(yytext); 				                         return T_StringConstant; }				{BEG_STRING}        { ReportError(&yylloc, err_unterm_string, yytext); }												 /* -------------------- Identifiers --------------------------- */				{IDENTIFIER}        {				                        /* The lexer records the identifier name in yylval.				                         * The parser is reponsible for looking up the name				                         * in the appropriate scope(s) to find the decl.				                         */				                         strncpy(yylval.identifier, yytext,				                                 sizeof(yylval.identifier)-1);				                         return T_Identifier; }												 /* -------------------- Default rule (error) -------------------- */				.                   { ReportError(&yylloc, err_unrecog_char, yytext[0]); }								%%				int yywrap()				{					return 1;				}								/*				 * Function: Inityylex()				 * --------------------				 * This function will be called before any calls to yylex().  It is designed				 * to give you an opportunity to do anything that must be done to initialize				 * the scanner (set global variables, configure starting state, etc.). One				 * thing it already does for you is assign the value of the global variable				 * yy_flex_debug that controls whether flex prints debugging information				 * about each token and what rule was matched. If set to false, no information				 * is printed. Setting it to true will give you a running trail that might				 * be helpful when debugging your scanner. Please be sure the variable is				 * set to false when submitting your final version.				 */				void Inityylex()				{				    PrintDebug("lex", "Initializing scanner");				    yy_flex_debug = false;				    BEGIN(N);  // Start in Normal state				    yy_push_state(COPY);  // but copy first line at start				    curLineNum = 1;				    curColNum = 1;				}												/*				 * Function: DoBeforeEachAction()				 * ------------------------------				 * This function is installed as the YY_USER_ACTION. This is a place				 * to group code common to all actions.				 * On each match, we fill in the fields to record its location and				 * update our column counter.				 */				static void DoBeforeEachAction()				{				   yylloc.first_line = curLineNum;				   yylloc.first_column = curColNum;				   yylloc.last_column = curColNum + yyleng - 1;				   curColNum += yyleng;				}								/* Function: GetLineNumbered()				 * ---------------------------				 * Returns string with contents of line numbered n or NULL if the				 * contents of that line are no longer available. Basically only the				 * line currently being scanned is available, although we could keep				 * a few lines back if we put more effort into it :-). The pointer				 * returned is to an internally maintained static buffer which will				 * be overwritten. If you want to preserve, be sure to copy elsewhere.				 */				const char *GetLineNumbered(int num) {				    return (num == curLineNum) ? curLine : NULL;				}															

相关资源