summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsanine <sanine.not@pm.me>2023-01-16 13:35:04 -0600
committersanine <sanine.not@pm.me>2023-01-16 13:35:04 -0600
commit8d5389d66ef79b58a0fff32fa2b01b4206bfb311 (patch)
tree02bd7a8c80d5c4897a6e29d5efcccc9ea01082e4
parente7073c34d2ca92663d98bbb1912ad5f6e615e87f (diff)
add initial parser
-rw-r--r--yy/kalmia.l52
-rw-r--r--yy/kalmia.y118
-rw-r--r--yy/makefile10
3 files changed, 180 insertions, 0 deletions
diff --git a/yy/kalmia.l b/yy/kalmia.l
new file mode 100644
index 0000000..2472bdc
--- /dev/null
+++ b/yy/kalmia.l
@@ -0,0 +1,52 @@
+%{
+#include <stdio.h>
+#include "y.tab.h"
+
+char *copy_str(char *);
+int line_num;
+
+%}
+
+S \x20\x0a\x0d\x09
+DATE [0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z]*
+
+
+
+
+
+%start STRING
+
+%%
+
+<INITIAL>"<?xml".*"?>" { return PROLOG; }
+
+<INITIAL>[\x20\x0a\x0d\x09] { if (*yytext == '\n') { line_num += 1; } }
+
+
+<INITIAL>"<" { return S_TAG_OPEN; }
+<INITIAL>"</" { return E_TAG_OPEN; }
+<INITIAL>">" { return TAG_CLOSE; }
+<INITIAL>"/>" { return EMPTY_TAG_CLOSE; }
+
+
+<INITIAL>[a-zA-Z_:][a-zA-Z0-9\.\-_:]* { yylval.sval = copy_str(yytext); return NAME; }
+
+<INITIAL>"=" { return *yytext; }
+
+<INITIAL>"\"" { BEGIN(STRING); return *yytext; }
+<STRING>"\"" { BEGIN(INITIAL); return *yytext; }
+<STRING>[^"] { yylval.cval = *yytext; return CHAR; }
+
+<INITIAL>-?[0-9]+ { yylval.lval = strtol(yytext, NULL, 10); return INTEGER; }
+<INITIAL>-?[0-9]+\.?[0-9]* { yylval.dval = strtod(yytext, NULL); return DOUBLE; }
+<INITIAL>{DATE} { return DATE; }
+
+%%
+
+char * copy_str(char *str)
+{
+ size_t len = strlen(str) + 1;
+ char *copy = malloc(sizeof(char) * len);
+ strncpy(copy, str, len);
+ return copy;
+}
diff --git a/yy/kalmia.y b/yy/kalmia.y
new file mode 100644
index 0000000..5d808e2
--- /dev/null
+++ b/yy/kalmia.y
@@ -0,0 +1,118 @@
+%{
+#include <stdio.h>
+int yyerror(const char *);
+int yylex();
+
+char attr_buf[1024];
+int attr_i;
+%}
+
+
+%union {
+ long lval;
+ double dval;
+ char *sval;
+ char cval;
+}
+
+%token PROLOG
+%token S_TAG_OPEN E_TAG_OPEN TAG_CLOSE EMPTY_TAG_CLOSE
+%token <sval> NAME
+%token <cval> CHAR
+%token <lval> INTEGER
+%token <dval> DOUBLE
+%token DATE;
+
+
+%define parse.error verbose
+
+
+%%
+
+
+document: PROLOG element;
+
+elements:
+ element
+ | elements element
+ ;
+
+element:
+ empty_tag
+ | start_tag end_tag
+ | start_tag content end_tag
+ ;
+
+content:
+ elements
+ | integers
+ | doubles
+ | DATE
+ ;
+
+empty_tag:
+ S_TAG_OPEN NAME attributes EMPTY_TAG_CLOSE { printf("empty tag: %s\n", $2); }
+ ;
+
+start_tag:
+ S_TAG_OPEN NAME attributes TAG_CLOSE { printf("enter tag: %s\n", $2); }
+ ;
+
+end_tag:
+ E_TAG_OPEN NAME TAG_CLOSE { printf("exit tag: %s\n", $2); }
+ ;
+
+attributes:
+ | attribute
+ | attributes attribute;
+ ;
+
+attribute:
+ NAME '=' '"' chars '"' { attr_buf[attr_i] = 0; printf("attribute: %s=%s\n", $1, attr_buf); attr_i = 0; }
+ ;
+
+chars:
+ | CHAR { attr_buf[attr_i] = $1; attr_i += 1; }
+ | chars CHAR { attr_buf[attr_i] = $2; attr_i += 1; }
+ ;
+
+
+integers:
+ INTEGER
+ | integers INTEGER
+ ;
+
+doubles:
+ DOUBLE
+ | doubles DOUBLE
+ ;
+
+
+%%
+
+
+extern FILE *yyin;
+extern int line_num;
+
+int main()
+{
+ attr_i = 0;
+ line_num = 0;
+ yyin = fopen("in.xml", "r");
+ if (yyin == NULL) {
+ fprintf(stderr, "could not open file!\n");
+ return -1;
+ }
+ do {
+ yyparse();
+ } while (!feof(yyin));
+
+ return 0;
+}
+
+
+int yyerror(const char *msg)
+{
+ fprintf(stderr, "parse error on line %d: %s\n", line_num, msg);
+ return 1;
+}
diff --git a/yy/makefile b/yy/makefile
new file mode 100644
index 0000000..cd482dc
--- /dev/null
+++ b/yy/makefile
@@ -0,0 +1,10 @@
+all: kalmia
+
+y.tab.c: kalmia.y
+ yacc -d kalmia.y
+
+lex.yy.c: kalmia.l
+ lex kalmia.l
+
+kalmia: y.tab.c lex.yy.c
+ gcc -o kalmia y.tab.c lex.yy.c -lfl