System Programming Lab: LEX: Lexical Analyser Generator
System Programming Lab: LEX: Lexical Analyser Generator
The input notation for the Lex tool is referred to as the Lex language and the tool is
compiler. Behind the scenes, the Lex compiler transform the input patterns diagram and
generates code, in a fie called lex.yy.c, that simulates this transition.
%%
Transition rules
%%
Auxiliary functions
Declaration Section:
%{
Declaration
%}
Regular definition
%%
Translational rules:
Pattern {Action}
%%
Each pattern is a regular expression, which may use the regular definitions of the
declaration section. The actions are fragments of code, typically written in C.
13CSL68 Page 1
SYSTEM PROGRAMMING LAB
Auxiliary functions:
The third section holds whatever additional functions are used in the actions can be
compiled separately and loaded with the lexical analyser.
The lex command reads Files or standard inputs, generates a C program and a file
named lex.yy.c, is a compilable C language program> The generated program is a
function is yylex.
The generated lex.yy.c is compiled by the C compiler into a file called a.out.
C
compiler
lex.yy.c a.out
a.out
input output
Commands are:
lex file1.l /*creates lex.yy.c*/
cc lex.yy.c –ll /*link library files and creates a.out*/
./a.out /*execute the program*/
13CSL68 Page 2
SYSTEM PROGRAMMING LAB
Some of the regular expressions are given along with the description:
Examples:
Regular expression to match
1. Identifiers
[a-zA-Z][a-zA-Z0-9]*
2. Positive floating point number
([+]?[0-9]*\[0-9]+)|([0-9]+\[0-9]*)
3. Vowels
[aeiou]
4. Non alphanumeric characters
[^a-zA-Z0-9]
13CSL68 Page 3
SYSTEM PROGRAMMING LAB
PART-A PROGRAMS:LEX:
1) Program to count the number of characters, words, spaces and lines in a given input
file:
%{
#include<stdio.h>
int wc=0,sc=0,cc=0,lc=0;
%}
%%
[^ \t\n]+ {wc++,cc+=yyleng;}
[ ] {sc++,cc++;}
[\n] {lc++;}
%%
main(int argc,char *argv[])
{
if(argc<=1)
{
printf("filename not entered\n");
exit(1);
}
else
{
yyin=fopen(argv[1],"r");
if(yyin==0)
{
printf("file does not exit\n");
exit(1);
}
}
yylex();
fclose(yyin);
printf("word=%d,\ncharacter=%d,\nline=%d,\nspace=%d\n",wc,cc,lc,sc);
}
output:
[oslab@localhost 15CS031]$ cat 1aa.l
Hello good morning
welcome to cs department
ssit Tumkur
[oslab@localhost 15CS031]$ lex 1a.l
[oslab@localhost 15CS031]$ cc lex.yy.c -ll
[oslab@localhost 15CS031]$ ./a.out
filename not entered
[oslab@localhost 15CS031]$ ./a.out 1aa.l
word=10,
character=50,
line=3,
space=7
13CSL68 Page 4
SYSTEM PROGRAMMING LAB
2) Program to count the numbers of comment lines in a given C program also eliminate
them and copy the resulting program into separate file:
%{
#include<stdio.h>
#include<stdlib.h>
int cc=0;
%}
%%
"//"[^\n] {cc++;}
"/*"([^*/]|\n)*"*/" {count++;}
. {ECHO;}
%%
main(int argc,char *argv[])
{
if(argc<=1)
{
printf("filename not entered\n");
exit(1);
}
else
{
yyin=fopen(argv[1],"r");
if(yyin==0)
{
printf("file does not exits\n");
exit(0);
}
yyout=fopen(argv[2],"w");
yylex();
fclose(yyin);
fclose(yyout);
printf("Number of comment lines=%d\n",cc);
}
}
output:
[oslab@localhost 15CS031]$ cat comment.c
#include<stdio.h>
#include<stdlib.h>
//main program
void main()
{
int a,b,c;
a=10;
b=20;
c=a+b;
13CSL68 Page 5
SYSTEM PROGRAMMING LAB
/* assigning
result to
variable c*/
printf(" c=%d",c);
}
[oslab@localhost 15CS031]$ lex 2a.l
[oslab@localhost 15CS031]$ cc lex.yy.c -ll
[oslab@localhost 15CS031]$ ./a.out
filename not entered
[oslab@localhost 15CS031]$ ./a.out comment.c comment1.c
Number of comment lines=2
[oslab@localhost 15CS031]$ cat comment1.c
#include<stdio.h>
#include<stdlib.h>
void main()
{
int a,b,c;
a=10;
b=20;
c=a+b;
printf(" c=%d",c);
}
13CSL68 Page 6
SYSTEM PROGRAMMING LAB
%{
#include<stdio.h>
char opr[20],opd[20];
void scan();
int i=0,j=0;
%}
d [0-9]
l [a-zA-Z]
num {d}+
id {l}({l}|{d})*
opd {num}|{id}
opr [*+/-]
expr {opd}({opr}{opd})+
%%
{expr} {printf("valid expression\n"); scan();}
.* {printf("invalid expression\n"); exit(0);}
%%
main()
{
printf(" enter the expression");
yylex();
printf(" operators are: %s",opr);
printf("operands are: %s",opd);
}
void scan()
{
int k;
for(k=0;k<yyleng;k++)
{
switch(yytext[k])
{
case '+':
case '-':
case '*':
case '/':opr[i++]=yytext[k];
opr[i++]=' ';
break;
default :
if(isalpha(yytext[k]))
{
while(isalnum(yytext[k]))
opd[j++]=yytext[k++];
opd[j++]=' ';
k--;
13CSL68 Page 7
SYSTEM PROGRAMMING LAB
}
if(isdigit(yytext[k]))
{
while(1)
opd[j++]=yytext[k++];
if(isdigit(yytext[k]))
continue;
else
break;
}
k--;
opd[j++]=' ';
}
}
}
}
output:
2+r-9/2*4
valid expression
operators are: + - / *
operands are: 2 r 9 2 4
[oslab@localhost 15CS031]$ ./a.out
enter the expression
+a+
invalid expression
13CSL68 Page 8
SYSTEM PROGRAMMING LAB
%{
#include<stdio.h>
#include<stdlib.h>
int flag=0;
%}
%%
"or"|"and"|"since"|"because"|"not"|"as"|"then" {flag=1;}
%%
main()
{
printf("enter the sentence\n");
yylex();
if(flag==1)
printf("\nCompound sentence\n");
else
printf("\nSimple sentence\n");
}
output:
[oslab@localhost 15CS031]$ vi 4a.l
[oslab@localhost 15CS031]$ lex 4a.l
[oslab@localhost 15CS031]$ cc lex.yy.c -ll
[oslab@localhost 15CS031]$ ./a.out
enter the sentence
this is sp lab
Simple sentence
[oslab@localhost 15CS031]$ ./a.out
enter the sentence
We have SP and J2EE lab
Compound sentence
13CSL68 Page 9
SYSTEM PROGRAMMING LAB
5)Program to recognize and count the number of identifiers in a given input file.
%{
#include<stdio.h>
int c=0;
void scan();
%}
d [0-9]
l [a-zA-Z]
id {l}({l}|{d})*
type int|float|double|char
dec {type}" "{id}(,{id})*";"
%%
{dec} {scan();}
.|\n ;
%%
main()
{
char file[10];
printf(" enter the filename\n");
scanf("%s",file);
yyin=fopen(file,"r");
yylex();
fclose(yyin);
printf("\nNumber of identifiers=%d\n",c);
}
void scan()
{
int i;
for(i=0;i<yyleng;i++)
if(yytext[i]==',')
c++;
c++;
}
output:
[oslab@localhost 15CS031]$ cat 4aa.c
int a,b,c,d;
float k,j;
[oslab@localhost 15CS031]$ lex 4a.l
[oslab@localhost 15CS031]$ cc lex.yy.c -ll
[oslab@localhost 15CS031]$ ./a.out
enter the filename
4aa.c
Number of identifiers=6
13CSL68 Page 10
SYSTEM PROGRAMMING LAB
6) Program that copies a file, replacing each nonempty sequence of white spaces by a
single blank.
%{
#include<stdio.h>
%}
%%
[ \n\t]+ {fprintf("yyout"," ");}
%%
main(int argc,char *argv[])
{
if(argc>2)
{
yyin=fopen(argv[1],"r");
if(yyin==0)
{
printf("\nFile does not exists\n");
exit(1);
}
yyout=fopen(argv[2],"w");
yylex();
fclose(yyin);
fclose(yyout);
}
}
output:
SSIT college
Tumkur, Karnataka
[oslab@localhost 15CS031]$ lex 6a.l
[oslab@localhost 15CS031]$ cc lex.yy.c -ll
[oslab@localhost 15CS031]$ ./a.out 6aa.c 6bb.c
[oslab@localhost 15CS031]$ cat 6bb.c
Harshitha S SSIT college Tumkur, Karnataka
13CSL68 Page 11
SYSTEM PROGRAMMING LAB
Yacc:Parser Generator
Yacc is a tool that uses LALR parser which is used in the construction of the front end of
the compiler. Yacc stands for “yet another compiler compiler”. Yacc is available as a
command on the UNIX system, and has been used to help implement many production
compilers.
%%
Transition rules
%%
Supporting C routines
Declarations part:
%{
Declarations
%}
Declaration of tokens
%%
There are two sections in declaration part of Yacc program, both are optional in the first
part, ordinary C declarations are put, delimited by %{ and %}. The second part is the
declaration of tokens.
Eg:
%{
#include<stdio.h>
%}
%token A B
%%
Translation rules part:
In the part of the Yacc specification after the first %% pair, the translation rules are put.
Each rule consists of a grammar production and the associated semantic action. A set
of productions, say:
<head>-><body>1|<body>2|….|<body>n
would be written in Yacc as
<head>:<body>1 {<semantic action>1;)
|<body>2 {<semantic action>2;}
……..
|<body>n {<semantic action>3;}
;
In Yacc production, unquoted strings of letters and digits not declared to are taken to be
nonterminals. A quoted single character e.g., ‘c’ taken to be as symbol c.
13CSL68 Page 12
SYSTEM PROGRAMMING LAB
Translate.y y.tab.c
C
Compiler
y.tab.c
a.out
input output
The above figure shows the execution of Yacc program. First, a file say translate.y.
Yacc specification of the translator is prepared. The UNIX system command:
yacc translate.y
transform the file in to a C program called y.tab.c using LALR method. The program
y.tab.c is representation of an LALR parser written in C, along with other C routines. By
compiling y.tab.c using command
cc y.tab.c –ll
we obtain the desired object program a.out that performs the translation specified by the
original Yacc program.
Commands are
lex a1.l
cc lex.yy.c y.tab.c –ll
./a.out
13CSL68 Page 13
SYSTEM PROGRAMMING LAB
//1bl.l
%{
#include "y.tab.h"
%}
%%
[0-9] return D;
[a-zA-Z] return L;
\n return 0;
. return yytext[0];
%%
//1b.y
%{
#include<stdio.h>
%}
%token D L
%%
var: L chars
;
chars: char chars
|
;
char: L
|D
;
%%
main()
{
printf("Enter a variable\n");
yyparse();
printf("Valid\n");
}
int yyerror()
{
printf("Invalid\n");
exit(0);
}
output:
13CSL68 Page 15
SYSTEM PROGRAMMING LAB
//2bl.l
%{
#include "y.tab.h"
extern int yylval;
%}
digit [0-9]
NUM {digit}+
%%
{NUM} {yylval=atoi(yytext);
return NUM;}
. return yytext[0];
\n return 0;
%%
//2b.y
%{
#include<stdio.h>
%}
%token NUM
%%
S:E {printf("Result=%d\n",$1);}
;
E:E'+'T {$$=$1+$3;}
|E'-'T {$$=$1-$3;}
|T {$$=$1;}
;
T:T'*'F {$$=$1*$3;}
|T'/'F {if($3==0)
yyerror();
else
$$=$1/$3;}
|F
;
F:'('E')' {$$=$2;}
|NUM {$$=$1;}
;
%%
main()
{
printf("Enter the arithmetic expression\n");
yyparse();
}
int yyerror()
{
13CSL68 Page 16
SYSTEM PROGRAMMING LAB
output:
13CSL68 Page 17
SYSTEM PROGRAMMING LAB
//3bl.l
%{
#include "y.tab.h"
%}
%%
a return A;
b return B;
. return yytext[0];
\n return 0;
%%
//3b.y
%{
#include<stdio.h>
%}
%token A B
%%
S: A S B
|
;
%%
main()
{
printf("\nEnter a variable\n");
yyparse();
printf("\nValid\n");
exit(0);
}
int yyerror()
{
printf("\nInvalid\n");
exit(0);
}
output:
[oslab@localhost 15CS031]$ lex 3bl.l
[oslab@localhost 15CS031]$ yacc -d 3b.y
[oslab@localhost 15CS031]$ cc lex.yy.c y.tab.c -ll
[oslab@localhost 15CS031]$ ./a.out
Enter a variable
aaabbb
Valid
[oslab@localhost 15CS031]$ ./a.out
Enter a variable
abbb
Invalid
13CSL68 Page 18
SYSTEM PROGRAMMING LAB
//4bl.l
%{
#include "y.tab.h"
%}
%%
a{10}a* return A;
. return yytext[0];
\n return 0;
%%
//4b.y
%{
#include<stdio.h>
%}
%token A
%%
S:A'b'
;
%%
main()
{
printf("\nEnter the grammar\n");
yyparse();
printf("\nValid\n");
}
int yyerror()
{
printf("\nInvalid\n");
exit(0);
}
output:
[oslab@localhost 15CS031]$ lex 4b1.l
[oslab@localhost 15CS031]$ yacc -d 4b.y
[oslab@localhost 15CS031]$ cc lex.yy.c y.tab.c -ll
[oslab@localhost 15CS031]$ ./a.out
Enter the grammar
ab
Invalid
[oslab@localhost 15CS031]$ ./a.out
Enter the grammar
aaaaaaaaaab
Valid
13CSL68 Page 19
SYSTEM PROGRAMMING LAB
5) Program that takes Boolean expressions as input and produces the true value of the
expressions.
//5bl.l
%{
#include "y.tab.h"
extern int yylval;
%}
digit [0-9]
NUM {digit}+
%%
{NUM} {yylval=atoi(yytext); return NUM;}
"&&" return AND;
"||" return OR;
"!" return NOT;
"<" return LT;
">" return GT;
"<=" return LE;
">=" return GE;
"==" return EQ;
"!=" return NE;
true return TRUE;
false return FALSE;
. return yytext[0];
\n return 0;
%%
//5b.y
%{
#include<stdio.h>
%}
%token AND OR NOT LT GT LE GE EQ NE NUM TRUE FALSE
%left OR
%left AND
%left '+''-'
%lft '*''/'
%nonassoc NOT'(" ")'
%%
s:B {if($$==1) printf("TRUE\n");
else printf("FALSE\n");}
;
B:B OR B {$$=$1||$3;}
|B AND B {$$=$1&&$3;}
|NOT B {$$=!$2;}
|'('B')' {$$=$2;}
|E LT E {if($1<$3) $$=1;
else $$=0;}
13CSL68 Page 20
SYSTEM PROGRAMMING LAB
output:
[oslab@localhost 15CS031]$ lex 5bl.l
[oslab@localhost 15CS031]$ yacc -d 5b.y
[oslab@localhost 15CS031]$ cc lex.yy.c y.tab.c -ll
[oslab@localhost 15CS031]$ ./a.out
Enter an expression
(2+3<=7)&&(5-3<=1)
FALSE
valid
[oslab@localhost 15CS031]$ ./a.out
enter the expression
!((5+1)>(6+5))
TRUE
valid
13CSL68 Page 21
SYSTEM PROGRAMMING LAB
//6b.l
%{
#include "y.tab.h"
%}
%%
a return A;
b return B;
c return C;
. return yytext[0];
\n return 0;
%%
//6b.y
%{
#include<stdio.h>
%}
%token A B C
%%
S:A S A
|B S B
|C
;
%%
main()
{
printf("\nEnter the input\n");
yyparse();
printf("\nValid\n");
}
int yyerror()
{
printf("Invalid input\n");
exit(0);
}
output:
[oslab@localhost 15CS031]$ yacc -d 6b.y
[oslab@localhost 15CS031]$ cc lex.yy.c y.tab.c -ll
[oslab@localhost 15CS031]$ ./a.out
Enter the input
aabcbba
Valid
[oslab@localhost 15CS031]$ ./a.out
enter the input
bbbaacaab
Invalid
13CSL68 Page 22
SYSTEM PROGRAMMING LAB
%{
#include<stdio.h>
#include<string.h>
int c=0;
int ch=0;
char p[20][20];
int i=0;j=0;
int search(char*);
%}
l [a-z A-Z]
d [0-9]
id {l}({l}|{d})*
OP_BR "("
CL_BR ")"
OP_F "{"
CL_F "}"
keyword
"int"|"float"|"char"|"double"|"void"|"scanf"|"printf"|"if"|"else"|"switch"|"goto"|"main"|"break"|
"case"|"default"
PD ("#include"|"#define")*
RET "return"
plus "+"
minus "-"
mul "*"
div "/"
assign "="
lt "<"
gt ">"
ge ">="
le "<="
ne "!="
eq "=="
num {d}+
literal ["].*["]
%%
{OP_BR} {printf("%s",yytext);printf("\topen paranthesis\t");printf("null\n");}
{CL_BR} {printf("%s",yytext);printf("\tclose paranthesis\t");printf("null\n");}
{OP_F} {printf("%s",yytext);printf("\topen flower brace\t");printf("null\n");}
{CL_F} {printf("%s",yytext);printf("\tclose flower brace\t");printf("null\n");}
{num} {printf("%s",yytext);printf("\tnumber\t");printf("%s\n",yytext);}
{PD} {printf("%s",yytext);printf("\tPD\t");printf("null\n");}
{id}
{printf("%s",yytext);ch=search(yytext);if(ch==0){strcpy(p[i++],yytext);printf("null\n");printf(
"\t%d\n",++c);}else{printf("\tidentifiers\t");printf("\t%d\n",j);}}
13CSL68 Page 23
SYSTEM PROGRAMMING LAB
{keyword} {printf("%s",yytext);printf("\tkeyword\t");printf("null\n");}
{RET} {printf("%s",yytext);printf("\ttreturn\t\t\t");printf("null\n");}
{plus} {printf("%s",yytext);printf("\tarth_op\t");printf("plus\n");}
{minus} {printf("%s",yytext);printf("\tarth_op\t");printf("minusl\n");}
{mul} {printf("%s",yytext);printf("\tarth_op\t");printf("multiplication\n");}
{div} {printf("%s",yytext);printf("\tarth_op\t");printf("division\n");}
{lt} {printf("%s",yytext);printf("\trelop\t");printf("LT\n");}
{gt} {printf("%s",yytext);printf("\trelop\t");printf("GT\n");}
{le} {printf("%s",yytext);printf("\trelop\t");printf("LE\n");}
{ge} {printf("%s",yytext);printf("\trelop\t");printf("GE\n");}
{eq} {printf("%s",yytext);printf("\trelop\t");printf("EQ\n");}
{ne} {printf("%s",yytext);printf("\trelop\t");printf("NE\n");}
{assign} {printf("%s",yytext);printf("\tassign_op\t");printf("null\n");}
{literal} {printf("%s",yytext);printf("\tliteral\t\t");printf("null\n");}
;|, {printf("%s",yytext);printf("\tpuntuation\t");printf("\tnull\n");}
.|\n ;
%%
13CSL68 Page 24
SYSTEM PROGRAMMING LAB
output:
[oslab@localhost 15CS031]$ cat 1cc.c
#include<stdio.h>
void main()
{
int a=12,c=14;
a=a+c;
printf("simple prog");
}
[oslab@localhost 15CS031]$ lex 1c.l
[oslab@localhost 15CS031]$ cc lex.yy.c -ll
[oslab@localhost 15CS031]$ ./a.out
13CSL68 Page 25
SYSTEM PROGRAMMING LAB
2) Write a program to implement recursive descent parser for the expression grammar.
#include<stdio.h>
#include<unistd.h>
#include<stdlib.h>
void E();
void T();
void Eprime();
void Tprime();
void F();
int i=0;
char expr[25];
void main()
{
printf("\n enter the expression\n");
scanf("%s",expr);
E();
if(expr[i]=='\0')
printf("\n valid expression\n");
else
printf("\n invalid expression\n");
}
void E()
{
T();
Eprime();
}
void T()
{
F();
Tprime();
}
void Eprime()
{
if((expr[i]=='+')||(expr[i]=='-'))
{
i++;
T();
Eprime();
}
}
void Tprime()
{
if((expr[i]=='*')||(expr[i]=='/'))
{
i++;
13CSL68 Page 26
SYSTEM PROGRAMMING LAB
F();
Tprime();
}
}
void F()
{
if(expr[i]=='(')
{
i++;
E();
if(expr[i]==')')
i++;
else
{
printf("\nerror");
exit(0);
}
}
else if(isalpha(expr[i]))
while(isalnum(expr[i]))
i++;
else
{
printf("\n error\n");
exit(0);
}
}
output:
valid expression
[oslab@localhost 15CS031]$ ./a.out
(k+m
invalid expression
13CSL68 Page 27
SYSTEM PROGRAMMING LAB
//3c.l
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[0-9]+ {printf("%s",yytext);
return NUM;
}
[a-zA-Z][a-zA-Z0-9]* {printf("%s",yytext);
return ID;
}
. return yytext[0];
\n return 0;
%%
//3c.y
%{
#include<stdio.h>
%}
%token NUM ID
%left '+''-'
%left '*''/'
%nonassoc '(' ')'
%%
E:E'+'E {printf("+");}
|E'-'E {printf("-");}
|E'*'E {printf("*");}
|E'/'E {printf("/");}
|'('E')'
|NUM
|ID
;
%%
main()
{
printf("\nenter infix expression:\n");
yyparse();
}
int yyerror()
{
printf("\n error\n");
exit(0);
}
13CSL68 Page 28
SYSTEM PROGRAMMING LAB
output
13CSL68 Page 29
SYSTEM PROGRAMMING LAB
4) Write a yacc program that accepts a regular expression as input and produce its
parse tree as output.
//4c.l
%{
#include"y.tab.h"
%}
%%
a return A;
b return B;
\n return 0;
. return yytext[0];
%%
//4c.y
%{
#include<stdio.h>
%}
%token A B
%%
S: {printf("S->EPSILON\n");}
|A S B {printf("S->ASB\n");}
;
%%
main()
{
printf("\nenter the string\n");
yyparse();
printf("\naccept\n");
}
int yyerror()
{
printf("\ninvalid\n");
exit(0);
}
output
S->EPSILON
accept
[oslab@localhost 15CS031]$./a.out
s->ASB
s->ASB
invalid expression
13CSL68 Page 31
SYSTEM PROGRAMMING LAB
5) Write a C program to implement the syntax-directed definition of “if E then S”, “if E
then S1 else S2”.
#include<stdio.h>
#include<string.h>
char ip[100];
int i;
check()
{
if(strstr(ip,"else")!=NULL)
return 1;
else
return 0;
}
main()
{
char p[10][10];
char temp[20];
int j=0,k=0,ch;
printf("enter the input\n");
gets(ip);
strcat(ip," ");
ch=check();
for(i=0;i<strlen(ip);i++)
{
if(ip[i]=='('||ip[i]==' '||ip[i]==')')
{
temp[k]='\0';
if(k!=0)
{
strcpy(p[j++],temp);
strcpy(temp,"\0");
k=0;
}
}
else
temp[k++]=ip[i];
}
if(ch)
{
printf("\n output \n");
printf("\n if %s goto L1\n\tgoto L2\n", p[1]);
printf("L1:%s\n\t goto L3\n",p[3]);
printf("L2:%s\n L3:\n",p[5]);
}
else
{
13CSL68 Page 32
SYSTEM PROGRAMMING LAB
output:
output
if a>100 goto L1
goto L2
L1:a=10
L2:
[oslab@localhost 15CS031]$ ./a.out
output
if a>10 goto L1
goto L2
L1:a=0
goto L3
L2:b=0
L3:
13CSL68 Page 33