diff options
author | sanine <sanine.not@pm.me> | 2023-01-16 13:35:04 -0600 |
---|---|---|
committer | sanine <sanine.not@pm.me> | 2023-01-16 13:35:04 -0600 |
commit | 8d5389d66ef79b58a0fff32fa2b01b4206bfb311 (patch) | |
tree | 02bd7a8c80d5c4897a6e29d5efcccc9ea01082e4 /yy | |
parent | e7073c34d2ca92663d98bbb1912ad5f6e615e87f (diff) |
add initial parser
Diffstat (limited to 'yy')
-rw-r--r-- | yy/kalmia.l | 52 | ||||
-rw-r--r-- | yy/kalmia.y | 118 | ||||
-rw-r--r-- | yy/makefile | 10 |
3 files changed, 180 insertions, 0 deletions
diff --git a/yy/kalmia.l b/yy/kalmia.l new file mode 100644 index 0000000..2472bdc --- /dev/null +++ b/yy/kalmia.l @@ -0,0 +1,52 @@ +%{ +#include <stdio.h> +#include "y.tab.h" + +char *copy_str(char *); +int line_num; + +%} + +S \x20\x0a\x0d\x09 +DATE [0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z]* + + + + + +%start STRING + +%% + +<INITIAL>"<?xml".*"?>" { return PROLOG; } + +<INITIAL>[\x20\x0a\x0d\x09] { if (*yytext == '\n') { line_num += 1; } } + + +<INITIAL>"<" { return S_TAG_OPEN; } +<INITIAL>"</" { return E_TAG_OPEN; } +<INITIAL>">" { return TAG_CLOSE; } +<INITIAL>"/>" { return EMPTY_TAG_CLOSE; } + + +<INITIAL>[a-zA-Z_:][a-zA-Z0-9\.\-_:]* { yylval.sval = copy_str(yytext); return NAME; } + +<INITIAL>"=" { return *yytext; } + +<INITIAL>"\"" { BEGIN(STRING); return *yytext; } +<STRING>"\"" { BEGIN(INITIAL); return *yytext; } +<STRING>[^"] { yylval.cval = *yytext; return CHAR; } + +<INITIAL>-?[0-9]+ { yylval.lval = strtol(yytext, NULL, 10); return INTEGER; } +<INITIAL>-?[0-9]+\.?[0-9]* { yylval.dval = strtod(yytext, NULL); return DOUBLE; } +<INITIAL>{DATE} { return DATE; } + +%% + +char * copy_str(char *str) +{ + size_t len = strlen(str) + 1; + char *copy = malloc(sizeof(char) * len); + strncpy(copy, str, len); + return copy; +} diff --git a/yy/kalmia.y b/yy/kalmia.y new file mode 100644 index 0000000..5d808e2 --- /dev/null +++ b/yy/kalmia.y @@ -0,0 +1,118 @@ +%{ +#include <stdio.h> +int yyerror(const char *); +int yylex(); + +char attr_buf[1024]; +int attr_i; +%} + + +%union { + long lval; + double dval; + char *sval; + char cval; +} + +%token PROLOG +%token S_TAG_OPEN E_TAG_OPEN TAG_CLOSE EMPTY_TAG_CLOSE +%token <sval> NAME +%token <cval> CHAR +%token <lval> INTEGER +%token <dval> DOUBLE +%token DATE; + + +%define parse.error verbose + + +%% + + +document: PROLOG element; + +elements: + element + | elements element + ; + +element: + empty_tag + | start_tag end_tag + | start_tag content end_tag + ; + +content: + elements + | integers + | doubles + | DATE + ; + +empty_tag: + S_TAG_OPEN NAME attributes EMPTY_TAG_CLOSE { printf("empty tag: %s\n", $2); } + ; + +start_tag: + S_TAG_OPEN NAME attributes TAG_CLOSE { printf("enter tag: %s\n", $2); } + ; + +end_tag: + E_TAG_OPEN NAME TAG_CLOSE { printf("exit tag: %s\n", $2); } + ; + +attributes: + | attribute + | attributes attribute; + ; + +attribute: + NAME '=' '"' chars '"' { attr_buf[attr_i] = 0; printf("attribute: %s=%s\n", $1, attr_buf); attr_i = 0; } + ; + +chars: + | CHAR { attr_buf[attr_i] = $1; attr_i += 1; } + | chars CHAR { attr_buf[attr_i] = $2; attr_i += 1; } + ; + + +integers: + INTEGER + | integers INTEGER + ; + +doubles: + DOUBLE + | doubles DOUBLE + ; + + +%% + + +extern FILE *yyin; +extern int line_num; + +int main() +{ + attr_i = 0; + line_num = 0; + yyin = fopen("in.xml", "r"); + if (yyin == NULL) { + fprintf(stderr, "could not open file!\n"); + return -1; + } + do { + yyparse(); + } while (!feof(yyin)); + + return 0; +} + + +int yyerror(const char *msg) +{ + fprintf(stderr, "parse error on line %d: %s\n", line_num, msg); + return 1; +} diff --git a/yy/makefile b/yy/makefile new file mode 100644 index 0000000..cd482dc --- /dev/null +++ b/yy/makefile @@ -0,0 +1,10 @@ +all: kalmia + +y.tab.c: kalmia.y + yacc -d kalmia.y + +lex.yy.c: kalmia.l + lex kalmia.l + +kalmia: y.tab.c lex.yy.c + gcc -o kalmia y.tab.c lex.yy.c -lfl |