From 8e21b8f52a5c566767df9b3fe105706b6888a89b Mon Sep 17 00:00:00 2001 From: sanine Date: Tue, 31 Jan 2023 13:45:50 -0600 Subject: parse xml into memory --- yy/bad.xml | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ yy/kalmia.l | 1 - yy/kalmia.y | 101 ++++++++++++++++++++++++++++++++++++++++++---- yy/main.c | 42 ++++++++++++++++++++ yy/simple.xml | 8 ++++ 5 files changed, 270 insertions(+), 8 deletions(-) create mode 100644 yy/bad.xml create mode 100644 yy/simple.xml diff --git a/yy/bad.xml b/yy/bad.xml new file mode 100644 index 0000000..873797b --- /dev/null +++ b/yy/bad.xml @@ -0,0 +1,126 @@ + + + + 2005-11-14T02:16:38Z + 2005-11-15T11:36:38Z + 1.0 + + + + + + + + 1.0 1.0 1.0 1.0 + + + 1.0 1.0 1.0 1.0 + + + 1.0 1.0 1.0 1.0 + + + 1.0 1.0 1.0 1.0 + + + 20.0 + + + 1.0 1.0 1.0 1.0 + + + 0.5 + + + 1.0 1.0 1.0 1.0 + + + 1.0 + + + + + + + + + + + + + + + + + -0.5 0.5 0.5 + 0.5 0.5 0.5 + -0.5 -0.5 0.5 + 0.5 -0.5 0.5 + -0.5 0.5 -0.5 + 0.5 0.5 -0.5 + -0.5 -0.5 -0.5 + 0.5 -0.5 -0.5 + + + + + + + + + + + + 1.0 0.0 0.0 + -1.0 0.0 0.0 + 0.0 1.0 0.0 + 0.0 -1.0 0.0 + 0.0 0.0 1.0 + 0.0 0.0 -1.0 + + + + + + + + + + + + + + + +

0 4 2 4 3 4 1 4

+

0 2 1 2 5 2 4 2

+

6 3 7 3 3 3 2 3

+

0 1 4 1 6 1 2 1

+

3 0 7 0 5 0 1 0

+

5 5 7 5 6 5 4 5

+
+
+
+
+ + + + 0 0 0 + 0 0 1 0 + 0 1 0 0 + 1 0 0 0 + 1 1 1 + + + + + + + + + + + + + +
diff --git a/yy/kalmia.l b/yy/kalmia.l index ef7c200..85f68e3 100644 --- a/yy/kalmia.l +++ b/yy/kalmia.l @@ -21,7 +21,6 @@ %} S \x20\x0a\x0d\x09 -DATE [0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z]* ATTR ([\x20\x0a\x0d\x09]*)?"=" diff --git a/yy/kalmia.y b/yy/kalmia.y index 98650f8..56b15ac 100644 --- a/yy/kalmia.y +++ b/yy/kalmia.y @@ -13,8 +13,7 @@ typedef void* yyscan_t; struct kalmia_t { - void *current; - int index; + struct kai_tag_t *tag; }; struct kai_attr_t { @@ -39,9 +38,11 @@ int yyerror(YYLTYPE *yyllocp, yyscan_t unused, struct kalmia_t *unused2, const char *msg); struct kai_attr_t * kai_attr_new(char *key, char *value); + struct kai_attr_t * kai_attr_last(struct kai_attr_t *head); void kai_attr_destroy(struct kai_attr_t *attr); - struct kai_tag_t * kai_tag_new(char *type, struct kai_attr_t *attrs, struct kai_tag_t *children, char *content); + struct kai_tag_t * kai_tag_new(char *type, struct kai_attr_t *attrs); + struct kai_tag_t * kai_tag_last(struct kai_tag_t *head); void kai_tag_destroy(struct kai_tag_t *tag); } @@ -58,11 +59,79 @@ %token TEXT %token CONTENT +%type start_tag +%type empty_tag +%type tag +%type tags + +%type attribute +%type attributes + %% -document: PROLOG +document: PROLOG tag { result->tag = $2; } + + +tags: + tag { $$ = $1; } + | tags tag { $$ = $1; kai_tag_last($$)->next = $2; } + ; + + +tag: + start_tag end_tag { $$ = $1; } + | start_tag CONTENT end_tag { $$ = $1; $$->content = $2; } + | start_tag tags end_tag { $$ = $1; $$->children = $2; } + | empty_tag { $$ = $1; } + ; + +start_tag: + S_TAG_OPEN NAME TAG_CLOSE + { + $$ = kai_tag_new($2, NULL); + if ($$ == NULL) YYNOMEM; + } + | S_TAG_OPEN NAME attributes TAG_CLOSE + { + $$ = kai_tag_new($2, $3); + if ($$ == NULL) YYNOMEM; + } + ; + + +empty_tag: + S_TAG_OPEN NAME EMPTY_TAG_CLOSE + { + $$ = kai_tag_new($2, NULL); + if ($$ == NULL) YYNOMEM; + } + | S_TAG_OPEN NAME attributes EMPTY_TAG_CLOSE + { + $$ = kai_tag_new($2, $3); + if ($$ == NULL) YYNOMEM; + } + ; + + +end_tag: + E_TAG_OPEN NAME TAG_CLOSE + ; + + +attributes: + attribute { $$ = $1; } + | attributes attribute { $$ = $1; kai_attr_last($$)->next = $2; } + ; + +attribute: + ATTR '=' '"' TEXT '"' + { + $$ = kai_attr_new($1, $4); + if ($$ == NULL) YYNOMEM; + } + ; %% @@ -94,6 +163,15 @@ struct kai_attr_t * kai_attr_new(char *key, char *value) return attr; } +struct kai_attr_t * kai_attr_last(struct kai_attr_t *head) +{ + struct kai_attr_t *ptr = head; + while (ptr->next != NULL) { + ptr = ptr->next; + } + return ptr; +} + void kai_attr_destroy(struct kai_attr_t *attr) { if (attr == NULL) { @@ -107,7 +185,7 @@ void kai_attr_destroy(struct kai_attr_t *attr) } -struct kai_tag_t * kai_tag_new(char *type, struct kai_attr_t *attrs, struct kai_tag_t *children, char *content) +struct kai_tag_t * kai_tag_new(char *type, struct kai_attr_t *attrs) { struct kai_tag_t *tag = malloc(sizeof(struct kai_tag_t)); if (tag == NULL) { @@ -116,12 +194,21 @@ struct kai_tag_t * kai_tag_new(char *type, struct kai_attr_t *attrs, struct kai_ tag->type = type; tag->attrs = attrs; - tag->children = children; - tag->content = content; + tag->children = NULL; + tag->content = NULL; tag->next = NULL; return tag; } +struct kai_tag_t * kai_tag_last(struct kai_tag_t *head) +{ + struct kai_tag_t *ptr = head; + while (ptr->next != NULL) { + ptr = ptr->next; + } + return ptr; +} + void kai_tag_destroy(struct kai_tag_t *tag) { if (tag == NULL) { diff --git a/yy/main.c b/yy/main.c index 0d97fba..823db5a 100644 --- a/yy/main.c +++ b/yy/main.c @@ -6,6 +6,44 @@ #include "kalmia.lex.h" +void print_attrs(struct kai_attr_t *attr) +{ + while(attr != NULL) { + printf("%s=\"%s\" ", attr->key, attr->value); + attr = attr->next; + } +} + + +void print_tag(char *indent, struct kai_tag_t *tag) +{ + printf("%s%s[ ", indent, tag->type); + print_attrs(tag->attrs); + printf("]\n"); +} + + +void print_level(int indent_level, struct kai_tag_t *tag) +{ + char indent[64]; + for (int i=0; ichildren != NULL) { + print_level(indent_level+1, tag->children); + } + if (tag->content != NULL) { + printf("%s\t%s\n", indent, tag->content); + } + tag = tag->next; + } +} + + int main(int argc, char **argv) { if (argc < 2) { @@ -25,5 +63,9 @@ int main(int argc, char **argv) kalmiaset_in(in, scanner); kalmiaparse(scanner, &result); kalmialex_destroy(scanner); + + print_tag("", result.tag); + print_level(1, result.tag->children); + return 0; } diff --git a/yy/simple.xml b/yy/simple.xml new file mode 100644 index 0000000..3791d0e --- /dev/null +++ b/yy/simple.xml @@ -0,0 +1,8 @@ + + + + 2005-11-14T02:16:38Z + 2005-11-15T11:36:38Z + 1.0 + + -- cgit v1.2.1