#include "unity/unity.h" #include #include #include #include /* Wrapper provided in the module for calling the static function */ extern void test_htmlParseStartTag(htmlParserCtxtPtr ctxt); /* Simple capture of SAX startElement events */ typedef struct { xmlChar *name; int att_count; /* number of attribute pairs */ xmlChar **atts; /* flattened name, value, name, value..., NULL terminated */ } StartEvent; typedef struct { int nevents; StartEvent events[64]; } SAXCapture; static void capture_init(SAXCapture *cap) { memset(cap, 0, sizeof(*cap)); } static void capture_free(SAXCapture *cap) { for (int i = 0; i < cap->nevents; i++) { if (cap->events[i].name) { xmlFree(cap->events[i].name); } if (cap->events[i].atts) { /* Free copied attribute names and values */ int j = 0; while (cap->events[i].atts[j] != NULL) { xmlFree(cap->events[i].atts[j]); j++; } free(cap->events[i].atts); } } memset(cap, 0, sizeof(*cap)); } static void test_sax_startElement(void *ctx, const xmlChar *name, const xmlChar **atts) { SAXCapture *cap = (SAXCapture *)ctx; if (cap->nevents >= (int)(sizeof(cap->events)/sizeof(cap->events[0]))) return; StartEvent *ev = &cap->events[cap->nevents++]; ev->name = xmlStrdup(name); int count = 0; if (atts != NULL) { const xmlChar **p = atts; while (*p != NULL) { /* name */ p++; /* value */ if (*p == NULL) break; p++; count++; } } ev->att_count = count; if (atts != NULL && count > 0) { /* allocate space for 2*count + 1 NULL terminator */ ev->atts = (xmlChar **)calloc((size_t)(2 * count + 1), sizeof(xmlChar *)); int idx = 0; for (int i = 0; i < count; i++) { const xmlChar *aname = atts[2*i]; const xmlChar *aval = atts[2*i + 1]; ev->atts[idx++] = xmlStrdup(aname); ev->atts[idx++] = (aval != NULL) ? xmlStrdup(aval) : NULL; } ev->atts[idx] = NULL; } else { ev->atts = (xmlChar **)calloc(1, sizeof(xmlChar *)); ev->atts[0] = NULL; } } static const xmlChar* find_attr_value(const StartEvent *ev, const char *name) { if (ev->atts == NULL) return NULL; for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { if (xmlStrcasecmp(ev->atts[i], (const xmlChar *)name) == 0) { return ev->atts[i+1]; } } return NULL; } /* Create a parser context from a memory buffer and attach our SAX handler */ static htmlParserCtxtPtr make_ctxt(const char *buf, int flags, SAXCapture *cap) { htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(buf, (int)strlen(buf)); TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context"); static xmlSAXHandler sax; /* static to ensure it lives long enough */ memset(&sax, 0, sizeof(sax)); sax.startElement = test_sax_startElement; ctxt->sax = &sax; ctxt->userData = cap; ctxt->options |= flags; return ctxt; } void setUp(void) { /* Initialize libxml2 for safety */ xmlInitParser(); } void tearDown(void) { /* No global cleanup here to avoid interfering between tests */ } /* Test: simple start tag without implied element insertion (NOIMPLIED) */ void test_htmlParseStartTag_simple_div_noimplied(void) { const char *src = "
"; SAXCapture cap; capture_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); test_htmlParseStartTag(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.nevents); TEST_ASSERT_NOT_NULL(cap.events[0].name); TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); capture_free(&cap); htmlFreeParserCtxt(ctxt); } /* Test: uppercase tag and attribute names are lowercased; values preserved */ void test_htmlParseStartTag_uppercase_and_attr_lowercased(void) { const char *src = "
"; SAXCapture cap; capture_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); test_htmlParseStartTag(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.nevents); TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); TEST_ASSERT_TRUE(cap.events[0].att_count >= 2); /* Attribute names should be lowercased */ const xmlChar *vclass = find_attr_value(&cap.events[0], "class"); const xmlChar *vid = find_attr_value(&cap.events[0], "id"); TEST_ASSERT_NOT_NULL(vclass); TEST_ASSERT_NOT_NULL(vid); TEST_ASSERT_EQUAL_STRING("AbC", (const char *)vclass); TEST_ASSERT_EQUAL_STRING("42", (const char *)vid); capture_free(&cap); htmlFreeParserCtxt(ctxt); } /* Test: duplicate attributes are de-duplicated with first value preserved */ void test_htmlParseStartTag_duplicate_attributes_dedup(void) { const char *src = "
"; SAXCapture cap; capture_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); test_htmlParseStartTag(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.nevents); /* Only one 'class' attribute should remain, with the first value 'a' */ const StartEvent *ev = &cap.events[0]; /* Count how many 'class' attributes remain */ int class_count = 0; for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { if (xmlStrcasecmp(ev->atts[i], BAD_CAST "class") == 0) class_count++; } TEST_ASSERT_EQUAL_INT(1, class_count); const xmlChar *v = find_attr_value(ev, "class"); TEST_ASSERT_NOT_NULL(v); TEST_ASSERT_EQUAL_STRING("a", (const char *)v); capture_free(&cap); htmlFreeParserCtxt(ctxt); } /* Test: unexpected solidus inside tag (not as '/>') is ignored */ void test_htmlParseStartTag_unexpected_solidus_ignored(void) { const char *src = "
"; SAXCapture cap; capture_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); test_htmlParseStartTag(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.nevents); TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); const xmlChar *vx = find_attr_value(&cap.events[0], "id"); TEST_ASSERT_NOT_NULL(vx); TEST_ASSERT_EQUAL_STRING("x", (const char *)vx); capture_free(&cap); htmlFreeParserCtxt(ctxt); } /* Test: self-closing tags '/>' are handled and produce a startElement event */ void test_htmlParseStartTag_self_closing(void) { const char *src = "
"; SAXCapture cap; capture_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); test_htmlParseStartTag(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.nevents); TEST_ASSERT_EQUAL_STRING("br", (const char *)cap.events[0].name); TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); capture_free(&cap); htmlFreeParserCtxt(ctxt); } /* Test: incomplete tag without closing '>' is discarded (no SAX events) */ void test_htmlParseStartTag_incomplete_tag_discarded(void) { const char *src = "
"); SAXCapture cap; capture_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt(buf, HTML_PARSE_NOIMPLIED, &cap); test_htmlParseStartTag(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.nevents); TEST_ASSERT_EQUAL_STRING("span", (const char *)cap.events[0].name); TEST_ASSERT_EQUAL_INT(N, cap.events[0].att_count); /* Spot-check a few attributes */ const StartEvent *ev = &cap.events[0]; const xmlChar *v0 = find_attr_value(ev, "a0"); const xmlChar *v7 = find_attr_value(ev, "a7"); const xmlChar *v19 = find_attr_value(ev, "a19"); TEST_ASSERT_NOT_NULL(v0); TEST_ASSERT_NOT_NULL(v7); TEST_ASSERT_NOT_NULL(v19); TEST_ASSERT_EQUAL_STRING("v0", (const char *)v0); TEST_ASSERT_EQUAL_STRING("v7", (const char *)v7); TEST_ASSERT_EQUAL_STRING("v19", (const char *)v19); capture_free(&cap); htmlFreeParserCtxt(ctxt); } int main(void) { UNITY_BEGIN(); RUN_TEST(test_htmlParseStartTag_simple_div_noimplied); RUN_TEST(test_htmlParseStartTag_uppercase_and_attr_lowercased); RUN_TEST(test_htmlParseStartTag_duplicate_attributes_dedup); RUN_TEST(test_htmlParseStartTag_unexpected_solidus_ignored); RUN_TEST(test_htmlParseStartTag_self_closing); RUN_TEST(test_htmlParseStartTag_incomplete_tag_discarded); RUN_TEST(test_htmlParseStartTag_many_attributes); return UNITY_END(); }