From 8d5389d66ef79b58a0fff32fa2b01b4206bfb311 Mon Sep 17 00:00:00 2001 From: sanine Date: Mon, 16 Jan 2023 13:35:04 -0600 Subject: add initial parser --- yy/kalmia.l | 52 +++++++++++++++++++++++++++ yy/kalmia.y | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ yy/makefile | 10 ++++++ 3 files changed, 180 insertions(+) create mode 100644 yy/kalmia.l create mode 100644 yy/kalmia.y create mode 100644 yy/makefile (limited to 'yy') diff --git a/yy/kalmia.l b/yy/kalmia.l new file mode 100644 index 0000000..2472bdc --- /dev/null +++ b/yy/kalmia.l @@ -0,0 +1,52 @@ +%{ +#include +#include "y.tab.h" + +char *copy_str(char *); +int line_num; + +%} + +S \x20\x0a\x0d\x09 +DATE [0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z]* + + + + + +%start STRING + +%% + +"" { return PROLOG; } + +[\x20\x0a\x0d\x09] { if (*yytext == '\n') { line_num += 1; } } + + +"<" { return S_TAG_OPEN; } +"">" { return TAG_CLOSE; } +"/>" { return EMPTY_TAG_CLOSE; } + + +[a-zA-Z_:][a-zA-Z0-9\.\-_:]* { yylval.sval = copy_str(yytext); return NAME; } + +"=" { return *yytext; } + +"\"" { BEGIN(STRING); return *yytext; } +"\"" { BEGIN(INITIAL); return *yytext; } +[^"] { yylval.cval = *yytext; return CHAR; } + +-?[0-9]+ { yylval.lval = strtol(yytext, NULL, 10); return INTEGER; } +-?[0-9]+\.?[0-9]* { yylval.dval = strtod(yytext, NULL); return DOUBLE; } +{DATE} { return DATE; } + +%% + +char * copy_str(char *str) +{ + size_t len = strlen(str) + 1; + char *copy = malloc(sizeof(char) * len); + strncpy(copy, str, len); + return copy; +} diff --git a/yy/kalmia.y b/yy/kalmia.y new file mode 100644 index 0000000..5d808e2 --- /dev/null +++ b/yy/kalmia.y @@ -0,0 +1,118 @@ +%{ +#include +int yyerror(const char *); +int yylex(); + +char attr_buf[1024]; +int attr_i; +%} + + +%union { + long lval; + double dval; + char *sval; + char cval; +} + +%token PROLOG +%token S_TAG_OPEN E_TAG_OPEN TAG_CLOSE EMPTY_TAG_CLOSE +%token NAME +%token CHAR +%token INTEGER +%token DOUBLE +%token DATE; + + +%define parse.error verbose + + +%% + + +document: PROLOG element; + +elements: + element + | elements element + ; + +element: + empty_tag + | start_tag end_tag + | start_tag content end_tag + ; + +content: + elements + | integers + | doubles + | DATE + ; + +empty_tag: + S_TAG_OPEN NAME attributes EMPTY_TAG_CLOSE { printf("empty tag: %s\n", $2); } + ; + +start_tag: + S_TAG_OPEN NAME attributes TAG_CLOSE { printf("enter tag: %s\n", $2); } + ; + +end_tag: + E_TAG_OPEN NAME TAG_CLOSE { printf("exit tag: %s\n", $2); } + ; + +attributes: + | attribute + | attributes attribute; + ; + +attribute: + NAME '=' '"' chars '"' { attr_buf[attr_i] = 0; printf("attribute: %s=%s\n", $1, attr_buf); attr_i = 0; } + ; + +chars: + | CHAR { attr_buf[attr_i] = $1; attr_i += 1; } + | chars CHAR { attr_buf[attr_i] = $2; attr_i += 1; } + ; + + +integers: + INTEGER + | integers INTEGER + ; + +doubles: + DOUBLE + | doubles DOUBLE + ; + + +%% + + +extern FILE *yyin; +extern int line_num; + +int main() +{ + attr_i = 0; + line_num = 0; + yyin = fopen("in.xml", "r"); + if (yyin == NULL) { + fprintf(stderr, "could not open file!\n"); + return -1; + } + do { + yyparse(); + } while (!feof(yyin)); + + return 0; +} + + +int yyerror(const char *msg) +{ + fprintf(stderr, "parse error on line %d: %s\n", line_num, msg); + return 1; +} diff --git a/yy/makefile b/yy/makefile new file mode 100644 index 0000000..cd482dc --- /dev/null +++ b/yy/makefile @@ -0,0 +1,10 @@ +all: kalmia + +y.tab.c: kalmia.y + yacc -d kalmia.y + +lex.yy.c: kalmia.l + lex kalmia.l + +kalmia: y.tab.c lex.yy.c + gcc -o kalmia y.tab.c lex.yy.c -lfl -- cgit v1.2.1