#include "unity/unity.h" #include #include #include /* Wrapper for static function provided in module */ extern void test_htmlParseContent(htmlParserCtxtPtr ctxt); /* Simple capture structure for SAX events */ typedef struct { char *text; size_t text_len; char *last_comment; int comment_count; char *internal_subset_name; int internal_subset_count; char *end_names[32]; int end_count; } SaxCapture; static void cap_init(SaxCapture *cap) { memset(cap, 0, sizeof(*cap)); } static void cap_free(SaxCapture *cap) { free(cap->text); free(cap->last_comment); free(cap->internal_subset_name); for (int i = 0; i < cap->end_count; i++) free(cap->end_names[i]); } static void cap_append_text(SaxCapture *cap, const char *data, size_t len) { if (len == 0) return; char *newbuf = (char *)realloc(cap->text, cap->text_len + len + 1); if (!newbuf) return; /* best effort; tests will fail if NULL */ cap->text = newbuf; memcpy(cap->text + cap->text_len, data, len); cap->text_len += len; cap->text[cap->text_len] = '\0'; } /* SAX callbacks */ static void onCharacters(void *userData, const xmlChar *ch, int len) { SaxCapture *cap = (SaxCapture *)userData; cap_append_text(cap, (const char *)ch, (size_t)len); } static void onEndElement(void *userData, const xmlChar *name) { SaxCapture *cap = (SaxCapture *)userData; if (cap->end_count < (int)(sizeof(cap->end_names)/sizeof(cap->end_names[0]))) { const char *src = (const char *)name; size_t l = strlen(src); cap->end_names[cap->end_count] = (char *)malloc(l + 1); if (cap->end_names[cap->end_count]) { memcpy(cap->end_names[cap->end_count], src, l + 1); cap->end_count++; } } } static void onComment(void *userData, const xmlChar *value) { SaxCapture *cap = (SaxCapture *)userData; free(cap->last_comment); cap->last_comment = strdup((const char *)value); cap->comment_count++; } static void onInternalSubset(void *userData, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID) { (void)ExternalID; (void)SystemID; SaxCapture *cap = (SaxCapture *)userData; free(cap->internal_subset_name); cap->internal_subset_name = strdup((const char *)name); cap->internal_subset_count++; } /* Shared SAX handler */ static htmlSAXHandler make_sax(void) { htmlSAXHandler sax; memset(&sax, 0, sizeof(sax)); sax.characters = onCharacters; sax.endElement = onEndElement; sax.comment = onComment; sax.internalSubset = onInternalSubset; return sax; } /* Helper to create a parser context with our SAX */ static htmlParserCtxtPtr make_ctxt_with_sax(const char *data, SaxCapture *cap) { htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(data, (int)strlen(data)); if (ctxt) { static htmlSAXHandler sax; /* initialized per call */ sax = make_sax(); ctxt->sax = &sax; ctxt->userData = cap; ctxt->disableSAX = 0; } return ctxt; } void setUp(void) { /* Setup code here, or leave empty */ } void tearDown(void) { /* Cleanup code here, or leave empty */ } /* Tests */ void test_htmlParseContent_plain_text(void) { const char *html = "Hello"; SaxCapture cap; cap_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); TEST_ASSERT_NOT_NULL(ctxt); test_htmlParseContent(ctxt); TEST_ASSERT_NOT_NULL(cap.text); TEST_ASSERT_EQUAL_STRING("Hello", cap.text); TEST_ASSERT_EQUAL_INT(0, cap.comment_count); htmlFreeParserCtxt(ctxt); cap_free(&cap); } void test_htmlParseContent_element_with_end_tag(void) { const char *html = "

Hi

"; SaxCapture cap; cap_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); TEST_ASSERT_NOT_NULL(ctxt); test_htmlParseContent(ctxt); TEST_ASSERT_NOT_NULL(cap.text); TEST_ASSERT_EQUAL_STRING("Hi", cap.text); TEST_ASSERT_TRUE(cap.end_count >= 1); /* The first explicit end should be

*/ TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]); htmlFreeParserCtxt(ctxt); cap_free(&cap); } void test_htmlParseContent_comment_and_bogus_comment(void) { const char *html1 = "XY"; SaxCapture cap1; cap_init(&cap1); htmlParserCtxtPtr ctxt1 = make_ctxt_with_sax(html1, &cap1); TEST_ASSERT_NOT_NULL(ctxt1); test_htmlParseContent(ctxt1); TEST_ASSERT_EQUAL_INT(1, cap1.comment_count); TEST_ASSERT_EQUAL_STRING("abc", cap1.last_comment); TEST_ASSERT_EQUAL_STRING("XY", cap1.text); htmlFreeParserCtxt(ctxt1); cap_free(&cap1); const char *html2 = "AB"; /* bogus comment starting after Text"; SaxCapture cap; cap_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); TEST_ASSERT_NOT_NULL(ctxt); test_htmlParseContent(ctxt); TEST_ASSERT_EQUAL_INT(1, cap.internal_subset_count); TEST_ASSERT_NOT_NULL(cap.internal_subset_name); TEST_ASSERT_EQUAL_STRING("html", cap.internal_subset_name); TEST_ASSERT_EQUAL_STRING("Text", cap.text); htmlFreeParserCtxt(ctxt); cap_free(&cap); } void test_htmlParseContent_autoclose_on_end(void) { const char *html = "

ab"; /* no closing tag */ SaxCapture cap; cap_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); TEST_ASSERT_NOT_NULL(ctxt); test_htmlParseContent(ctxt); TEST_ASSERT_EQUAL_STRING("ab", cap.text); /* At end-of-input, auto-close should at least close

*/ TEST_ASSERT_TRUE(cap.end_count >= 1); TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]); htmlFreeParserCtxt(ctxt); cap_free(&cap); } void test_htmlParseContent_script_mode_preserves_lt_inside_script(void) { const char *html = ""; SaxCapture cap; cap_init(&cap); htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); TEST_ASSERT_NOT_NULL(ctxt); test_htmlParseContent(ctxt); TEST_ASSERT_NOT_NULL(cap.text); TEST_ASSERT_EQUAL_STRING("1<2", cap.text); /* Ensure script element was closed */ TEST_ASSERT_TRUE(cap.end_count >= 1); /* one of the closed elements should be script; it might not be the first if nested wrappers close after */ int saw_script = 0; for (int i = 0; i < cap.end_count; i++) { if (strcmp(cap.end_names[i], "script") == 0) { saw_script = 1; break; } } TEST_ASSERT_TRUE(saw_script); htmlFreeParserCtxt(ctxt); cap_free(&cap); } int main(void) { UNITY_BEGIN(); RUN_TEST(test_htmlParseContent_plain_text); RUN_TEST(test_htmlParseContent_element_with_end_tag); RUN_TEST(test_htmlParseContent_comment_and_bogus_comment); RUN_TEST(test_htmlParseContent_literal_lt_when_not_a_tag); RUN_TEST(test_htmlParseContent_doctype_in_content); RUN_TEST(test_htmlParseContent_autoclose_on_end); RUN_TEST(test_htmlParseContent_script_mode_preserves_lt_inside_script); return UNITY_END(); }