|
|
#include "unity/unity.h" |
|
|
#include <libxml/HTMLparser.h> |
|
|
|
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
#include <stdio.h> |
|
|
|
|
|
|
|
|
extern void test_htmlParseStartTag(htmlParserCtxtPtr ctxt); |
|
|
|
|
|
|
|
|
typedef struct { |
|
|
xmlChar *name; |
|
|
int att_count; |
|
|
xmlChar **atts; |
|
|
} StartEvent; |
|
|
|
|
|
typedef struct { |
|
|
int nevents; |
|
|
StartEvent events[64]; |
|
|
} SAXCapture; |
|
|
|
|
|
static void capture_init(SAXCapture *cap) { |
|
|
memset(cap, 0, sizeof(*cap)); |
|
|
} |
|
|
|
|
|
static void capture_free(SAXCapture *cap) { |
|
|
for (int i = 0; i < cap->nevents; i++) { |
|
|
if (cap->events[i].name) { |
|
|
xmlFree(cap->events[i].name); |
|
|
} |
|
|
if (cap->events[i].atts) { |
|
|
|
|
|
int j = 0; |
|
|
while (cap->events[i].atts[j] != NULL) { |
|
|
xmlFree(cap->events[i].atts[j]); |
|
|
j++; |
|
|
} |
|
|
free(cap->events[i].atts); |
|
|
} |
|
|
} |
|
|
memset(cap, 0, sizeof(*cap)); |
|
|
} |
|
|
|
|
|
static void test_sax_startElement(void *ctx, const xmlChar *name, const xmlChar **atts) { |
|
|
SAXCapture *cap = (SAXCapture *)ctx; |
|
|
if (cap->nevents >= (int)(sizeof(cap->events)/sizeof(cap->events[0]))) |
|
|
return; |
|
|
StartEvent *ev = &cap->events[cap->nevents++]; |
|
|
ev->name = xmlStrdup(name); |
|
|
|
|
|
int count = 0; |
|
|
if (atts != NULL) { |
|
|
const xmlChar **p = atts; |
|
|
while (*p != NULL) { |
|
|
|
|
|
p++; |
|
|
|
|
|
if (*p == NULL) break; |
|
|
p++; |
|
|
count++; |
|
|
} |
|
|
} |
|
|
ev->att_count = count; |
|
|
|
|
|
if (atts != NULL && count > 0) { |
|
|
|
|
|
ev->atts = (xmlChar **)calloc((size_t)(2 * count + 1), sizeof(xmlChar *)); |
|
|
int idx = 0; |
|
|
for (int i = 0; i < count; i++) { |
|
|
const xmlChar *aname = atts[2*i]; |
|
|
const xmlChar *aval = atts[2*i + 1]; |
|
|
ev->atts[idx++] = xmlStrdup(aname); |
|
|
ev->atts[idx++] = (aval != NULL) ? xmlStrdup(aval) : NULL; |
|
|
} |
|
|
ev->atts[idx] = NULL; |
|
|
} else { |
|
|
ev->atts = (xmlChar **)calloc(1, sizeof(xmlChar *)); |
|
|
ev->atts[0] = NULL; |
|
|
} |
|
|
} |
|
|
|
|
|
static const xmlChar* find_attr_value(const StartEvent *ev, const char *name) { |
|
|
if (ev->atts == NULL) return NULL; |
|
|
for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { |
|
|
if (xmlStrcasecmp(ev->atts[i], (const xmlChar *)name) == 0) { |
|
|
return ev->atts[i+1]; |
|
|
} |
|
|
} |
|
|
return NULL; |
|
|
} |
|
|
|
|
|
|
|
|
static htmlParserCtxtPtr make_ctxt(const char *buf, int flags, SAXCapture *cap) { |
|
|
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(buf, (int)strlen(buf)); |
|
|
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context"); |
|
|
static xmlSAXHandler sax; |
|
|
memset(&sax, 0, sizeof(sax)); |
|
|
sax.startElement = test_sax_startElement; |
|
|
ctxt->sax = &sax; |
|
|
ctxt->userData = cap; |
|
|
ctxt->options |= flags; |
|
|
return ctxt; |
|
|
} |
|
|
|
|
|
void setUp(void) { |
|
|
|
|
|
xmlInitParser(); |
|
|
} |
|
|
|
|
|
void tearDown(void) { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_simple_div_noimplied(void) { |
|
|
const char *src = "<div>"; |
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
|
|
TEST_ASSERT_NOT_NULL(cap.events[0].name); |
|
|
TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
|
|
TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_uppercase_and_attr_lowercased(void) { |
|
|
const char *src = "<DIV CLASS=AbC ID=42>"; |
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
|
|
TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
|
|
TEST_ASSERT_TRUE(cap.events[0].att_count >= 2); |
|
|
|
|
|
|
|
|
const xmlChar *vclass = find_attr_value(&cap.events[0], "class"); |
|
|
const xmlChar *vid = find_attr_value(&cap.events[0], "id"); |
|
|
TEST_ASSERT_NOT_NULL(vclass); |
|
|
TEST_ASSERT_NOT_NULL(vid); |
|
|
TEST_ASSERT_EQUAL_STRING("AbC", (const char *)vclass); |
|
|
TEST_ASSERT_EQUAL_STRING("42", (const char *)vid); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_duplicate_attributes_dedup(void) { |
|
|
const char *src = "<div class='a' CLASS=\"b\" class=c>"; |
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
|
|
|
|
|
const StartEvent *ev = &cap.events[0]; |
|
|
|
|
|
int class_count = 0; |
|
|
for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { |
|
|
if (xmlStrcasecmp(ev->atts[i], BAD_CAST "class") == 0) |
|
|
class_count++; |
|
|
} |
|
|
TEST_ASSERT_EQUAL_INT(1, class_count); |
|
|
const xmlChar *v = find_attr_value(ev, "class"); |
|
|
TEST_ASSERT_NOT_NULL(v); |
|
|
TEST_ASSERT_EQUAL_STRING("a", (const char *)v); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_unexpected_solidus_ignored(void) { |
|
|
const char *src = "<div / id='x'>"; |
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
|
|
TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
|
|
const xmlChar *vx = find_attr_value(&cap.events[0], "id"); |
|
|
TEST_ASSERT_NOT_NULL(vx); |
|
|
TEST_ASSERT_EQUAL_STRING("x", (const char *)vx); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_self_closing(void) { |
|
|
const char *src = "<br/>"; |
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
|
|
TEST_ASSERT_EQUAL_STRING("br", (const char *)cap.events[0].name); |
|
|
TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_incomplete_tag_discarded(void) { |
|
|
const char *src = "<div id='x'"; |
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(0, cap.nevents); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseStartTag_many_attributes(void) { |
|
|
|
|
|
char buf[4096]; |
|
|
strcpy(buf, "<span"); |
|
|
const int N = 20; |
|
|
char tmp[64]; |
|
|
for (int i = 0; i < N; i++) { |
|
|
snprintf(tmp, sizeof(tmp), " a%d='v%d'", i, i); |
|
|
strcat(buf, tmp); |
|
|
} |
|
|
strcat(buf, ">"); |
|
|
|
|
|
SAXCapture cap; capture_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt(buf, HTML_PARSE_NOIMPLIED, &cap); |
|
|
|
|
|
test_htmlParseStartTag(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
|
|
TEST_ASSERT_EQUAL_STRING("span", (const char *)cap.events[0].name); |
|
|
TEST_ASSERT_EQUAL_INT(N, cap.events[0].att_count); |
|
|
|
|
|
|
|
|
const StartEvent *ev = &cap.events[0]; |
|
|
const xmlChar *v0 = find_attr_value(ev, "a0"); |
|
|
const xmlChar *v7 = find_attr_value(ev, "a7"); |
|
|
const xmlChar *v19 = find_attr_value(ev, "a19"); |
|
|
TEST_ASSERT_NOT_NULL(v0); |
|
|
TEST_ASSERT_NOT_NULL(v7); |
|
|
TEST_ASSERT_NOT_NULL(v19); |
|
|
TEST_ASSERT_EQUAL_STRING("v0", (const char *)v0); |
|
|
TEST_ASSERT_EQUAL_STRING("v7", (const char *)v7); |
|
|
TEST_ASSERT_EQUAL_STRING("v19", (const char *)v19); |
|
|
|
|
|
capture_free(&cap); |
|
|
htmlFreeParserCtxt(ctxt); |
|
|
} |
|
|
|
|
|
int main(void) { |
|
|
UNITY_BEGIN(); |
|
|
|
|
|
RUN_TEST(test_htmlParseStartTag_simple_div_noimplied); |
|
|
RUN_TEST(test_htmlParseStartTag_uppercase_and_attr_lowercased); |
|
|
RUN_TEST(test_htmlParseStartTag_duplicate_attributes_dedup); |
|
|
RUN_TEST(test_htmlParseStartTag_unexpected_solidus_ignored); |
|
|
RUN_TEST(test_htmlParseStartTag_self_closing); |
|
|
RUN_TEST(test_htmlParseStartTag_incomplete_tag_discarded); |
|
|
RUN_TEST(test_htmlParseStartTag_many_attributes); |
|
|
|
|
|
return UNITY_END(); |
|
|
} |