#include "unity/unity.h"
#include
#include
#include
/* Wrapper for static function provided in module */
extern void test_htmlParseContent(htmlParserCtxtPtr ctxt);
/* Simple capture structure for SAX events */
typedef struct {
char *text;
size_t text_len;
char *last_comment;
int comment_count;
char *internal_subset_name;
int internal_subset_count;
char *end_names[32];
int end_count;
} SaxCapture;
static void cap_init(SaxCapture *cap) {
memset(cap, 0, sizeof(*cap));
}
static void cap_free(SaxCapture *cap) {
free(cap->text);
free(cap->last_comment);
free(cap->internal_subset_name);
for (int i = 0; i < cap->end_count; i++)
free(cap->end_names[i]);
}
static void cap_append_text(SaxCapture *cap, const char *data, size_t len) {
if (len == 0) return;
char *newbuf = (char *)realloc(cap->text, cap->text_len + len + 1);
if (!newbuf) return; /* best effort; tests will fail if NULL */
cap->text = newbuf;
memcpy(cap->text + cap->text_len, data, len);
cap->text_len += len;
cap->text[cap->text_len] = '\0';
}
/* SAX callbacks */
static void onCharacters(void *userData, const xmlChar *ch, int len) {
SaxCapture *cap = (SaxCapture *)userData;
cap_append_text(cap, (const char *)ch, (size_t)len);
}
static void onEndElement(void *userData, const xmlChar *name) {
SaxCapture *cap = (SaxCapture *)userData;
if (cap->end_count < (int)(sizeof(cap->end_names)/sizeof(cap->end_names[0]))) {
const char *src = (const char *)name;
size_t l = strlen(src);
cap->end_names[cap->end_count] = (char *)malloc(l + 1);
if (cap->end_names[cap->end_count]) {
memcpy(cap->end_names[cap->end_count], src, l + 1);
cap->end_count++;
}
}
}
static void onComment(void *userData, const xmlChar *value) {
SaxCapture *cap = (SaxCapture *)userData;
free(cap->last_comment);
cap->last_comment = strdup((const char *)value);
cap->comment_count++;
}
static void onInternalSubset(void *userData, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID) {
(void)ExternalID; (void)SystemID;
SaxCapture *cap = (SaxCapture *)userData;
free(cap->internal_subset_name);
cap->internal_subset_name = strdup((const char *)name);
cap->internal_subset_count++;
}
/* Shared SAX handler */
static htmlSAXHandler make_sax(void) {
htmlSAXHandler sax;
memset(&sax, 0, sizeof(sax));
sax.characters = onCharacters;
sax.endElement = onEndElement;
sax.comment = onComment;
sax.internalSubset = onInternalSubset;
return sax;
}
/* Helper to create a parser context with our SAX */
static htmlParserCtxtPtr make_ctxt_with_sax(const char *data, SaxCapture *cap) {
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(data, (int)strlen(data));
if (ctxt) {
static htmlSAXHandler sax; /* initialized per call */
sax = make_sax();
ctxt->sax = &sax;
ctxt->userData = cap;
ctxt->disableSAX = 0;
}
return ctxt;
}
void setUp(void) {
/* Setup code here, or leave empty */
}
void tearDown(void) {
/* Cleanup code here, or leave empty */
}
/* Tests */
void test_htmlParseContent_plain_text(void) {
const char *html = "Hello";
SaxCapture cap; cap_init(&cap);
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap);
TEST_ASSERT_NOT_NULL(ctxt);
test_htmlParseContent(ctxt);
TEST_ASSERT_NOT_NULL(cap.text);
TEST_ASSERT_EQUAL_STRING("Hello", cap.text);
TEST_ASSERT_EQUAL_INT(0, cap.comment_count);
htmlFreeParserCtxt(ctxt);
cap_free(&cap);
}
void test_htmlParseContent_element_with_end_tag(void) {
const char *html = "Hi
";
SaxCapture cap; cap_init(&cap);
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap);
TEST_ASSERT_NOT_NULL(ctxt);
test_htmlParseContent(ctxt);
TEST_ASSERT_NOT_NULL(cap.text);
TEST_ASSERT_EQUAL_STRING("Hi", cap.text);
TEST_ASSERT_TRUE(cap.end_count >= 1);
/* The first explicit end should be
*/
TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]);
htmlFreeParserCtxt(ctxt);
cap_free(&cap);
}
void test_htmlParseContent_comment_and_bogus_comment(void) {
const char *html1 = "XY";
SaxCapture cap1; cap_init(&cap1);
htmlParserCtxtPtr ctxt1 = make_ctxt_with_sax(html1, &cap1);
TEST_ASSERT_NOT_NULL(ctxt1);
test_htmlParseContent(ctxt1);
TEST_ASSERT_EQUAL_INT(1, cap1.comment_count);
TEST_ASSERT_EQUAL_STRING("abc", cap1.last_comment);
TEST_ASSERT_EQUAL_STRING("XY", cap1.text);
htmlFreeParserCtxt(ctxt1);
cap_free(&cap1);
const char *html2 = "AB"; /* bogus comment starting after Text";
SaxCapture cap; cap_init(&cap);
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap);
TEST_ASSERT_NOT_NULL(ctxt);
test_htmlParseContent(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap.internal_subset_count);
TEST_ASSERT_NOT_NULL(cap.internal_subset_name);
TEST_ASSERT_EQUAL_STRING("html", cap.internal_subset_name);
TEST_ASSERT_EQUAL_STRING("Text", cap.text);
htmlFreeParserCtxt(ctxt);
cap_free(&cap);
}
void test_htmlParseContent_autoclose_on_end(void) {
const char *html = "ab"; /* no closing tag */
SaxCapture cap; cap_init(&cap);
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap);
TEST_ASSERT_NOT_NULL(ctxt);
test_htmlParseContent(ctxt);
TEST_ASSERT_EQUAL_STRING("ab", cap.text);
/* At end-of-input, auto-close should at least close
*/
TEST_ASSERT_TRUE(cap.end_count >= 1);
TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]);
htmlFreeParserCtxt(ctxt);
cap_free(&cap);
}
void test_htmlParseContent_script_mode_preserves_lt_inside_script(void) {
const char *html = "";
SaxCapture cap; cap_init(&cap);
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap);
TEST_ASSERT_NOT_NULL(ctxt);
test_htmlParseContent(ctxt);
TEST_ASSERT_NOT_NULL(cap.text);
TEST_ASSERT_EQUAL_STRING("1<2", cap.text);
/* Ensure script element was closed */
TEST_ASSERT_TRUE(cap.end_count >= 1);
/* one of the closed elements should be script; it might not be the first if nested wrappers close after */
int saw_script = 0;
for (int i = 0; i < cap.end_count; i++) {
if (strcmp(cap.end_names[i], "script") == 0) {
saw_script = 1; break;
}
}
TEST_ASSERT_TRUE(saw_script);
htmlFreeParserCtxt(ctxt);
cap_free(&cap);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseContent_plain_text);
RUN_TEST(test_htmlParseContent_element_with_end_tag);
RUN_TEST(test_htmlParseContent_comment_and_bogus_comment);
RUN_TEST(test_htmlParseContent_literal_lt_when_not_a_tag);
RUN_TEST(test_htmlParseContent_doctype_in_content);
RUN_TEST(test_htmlParseContent_autoclose_on_end);
RUN_TEST(test_htmlParseContent_script_mode_preserves_lt_inside_script);
return UNITY_END();
}