#include "unity/unity.h"
#include
#include
#include
#include
#include
/* Simple state to track SAX callbacks during parsing */
typedef struct {
int locatorCount;
int startCount;
int endCount;
int commentCount;
int internalSubsetCount;
char lastComment[256];
char internalSubsetName[256];
} TestSAXState;
/* SAX callbacks */
static void onSetDocumentLocator(void *ctx, const xmlSAXLocator *loc) {
TestSAXState *st = (TestSAXState *)ctx;
(void)loc;
if (st) st->locatorCount++;
}
static void onStartDocument(void *ctx) {
TestSAXState *st = (TestSAXState *)ctx;
if (st) st->startCount++;
}
static void onEndDocument(void *ctx) {
TestSAXState *st = (TestSAXState *)ctx;
if (st) st->endCount++;
}
static void onComment(void *ctx, const xmlChar *value) {
TestSAXState *st = (TestSAXState *)ctx;
if (!st) return;
st->commentCount++;
if (value) {
strncpy(st->lastComment, (const char *)value, sizeof(st->lastComment) - 1);
st->lastComment[sizeof(st->lastComment) - 1] = '\0';
} else {
st->lastComment[0] = '\0';
}
}
static void onInternalSubset(void *ctx,
const xmlChar *name,
const xmlChar *ExternalID,
const xmlChar *SystemID) {
(void)ExternalID;
(void)SystemID;
TestSAXState *st = (TestSAXState *)ctx;
if (!st) return;
st->internalSubsetCount++;
if (name) {
strncpy(st->internalSubsetName, (const char *)name, sizeof(st->internalSubsetName) - 1);
st->internalSubsetName[sizeof(st->internalSubsetName) - 1] = '\0';
} else {
st->internalSubsetName[0] = '\0';
}
}
/* Helper: create a parser context with provided SAX and HTML buffer */
static htmlParserCtxtPtr makeCtxtWithHTML(xmlSAXHandler *sax, void *userData, const char *html) {
int len = (int)strlen(html);
/* Pass XML_CHAR_ENCODING_NONE to avoid pre-setting encoding flags */
htmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(sax, userData, html, len, NULL, XML_CHAR_ENCODING_NONE);
return ctxt;
}
/* Unity fixtures */
void setUp(void) {
xmlInitParser();
}
void tearDown(void) {
xmlCleanupParser();
}
/* Tests */
void test_htmlParseDocument_null_ctxt_returns_minus1(void) {
int ret = htmlParseDocument(NULL);
TEST_ASSERT_EQUAL_INT(-1, ret);
}
void test_htmlParseDocument_null_input_in_ctxt_returns_minus1(void) {
htmlParserCtxtPtr ctxt = htmlNewParserCtxt();
TEST_ASSERT_NOT_NULL(ctxt);
/* Ensure input is NULL */
TEST_ASSERT_NULL(ctxt->input);
int ret = htmlParseDocument(ctxt);
TEST_ASSERT_EQUAL_INT(-1, ret);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseDocument_calls_locator_start_end(void) {
const char *html = "Hello";
TestSAXState st = {0};
xmlSAXHandler sax;
memset(&sax, 0, sizeof(sax));
sax.setDocumentLocator = onSetDocumentLocator;
sax.startDocument = onStartDocument;
sax.endDocument = onEndDocument;
htmlParserCtxtPtr ctxt = makeCtxtWithHTML(&sax, &st, html);
TEST_ASSERT_NOT_NULL(ctxt);
int ret = htmlParseDocument(ctxt);
TEST_ASSERT_EQUAL_INT(0, ret);
TEST_ASSERT_EQUAL_INT(1, st.locatorCount);
TEST_ASSERT_EQUAL_INT(1, st.startCount);
TEST_ASSERT_EQUAL_INT(1, st.endCount);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseDocument_parses_leading_comment_and_doctype(void) {
const char *html = "";
TestSAXState st = {0};
xmlSAXHandler sax;
memset(&sax, 0, sizeof(sax));
sax.setDocumentLocator = onSetDocumentLocator;
sax.startDocument = onStartDocument;
sax.endDocument = onEndDocument;
sax.comment = onComment;
sax.internalSubset = onInternalSubset;
htmlParserCtxtPtr ctxt = makeCtxtWithHTML(&sax, &st, html);
TEST_ASSERT_NOT_NULL(ctxt);
int ret = htmlParseDocument(ctxt);
TEST_ASSERT_EQUAL_INT(0, ret);
TEST_ASSERT_EQUAL_INT(1, st.startCount);
TEST_ASSERT_EQUAL_INT(1, st.endCount);
TEST_ASSERT_EQUAL_INT(1, st.commentCount);
TEST_ASSERT_EQUAL_STRING("A", st.lastComment);
TEST_ASSERT_EQUAL_INT(1, st.internalSubsetCount);
TEST_ASSERT_EQUAL_STRING("html", st.internalSubsetName);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseDocument_parses_bogus_comment_and_pi_as_comments(void) {
/* Bogus comment: '' and bogus PI: ' ... >' before content */
const char *html = "";
TestSAXState st = {0};
xmlSAXHandler sax;
memset(&sax, 0, sizeof(sax));
sax.startDocument = onStartDocument;
sax.endDocument = onEndDocument;
sax.comment = onComment;
htmlParserCtxtPtr ctxt = makeCtxtWithHTML(&sax, &st, html);
TEST_ASSERT_NOT_NULL(ctxt);
int ret = htmlParseDocument(ctxt);
TEST_ASSERT_EQUAL_INT(0, ret);
/* Two comment callbacks expected: one for and one for */
TEST_ASSERT_EQUAL_INT(2, st.commentCount);
TEST_ASSERT_EQUAL_INT(1, st.startCount);
TEST_ASSERT_EQUAL_INT(1, st.endCount);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseDocument_disableSAX_suppresses_callbacks(void) {
const char *html = "";
TestSAXState st = {0};
xmlSAXHandler sax;
memset(&sax, 0, sizeof(sax));
sax.setDocumentLocator = onSetDocumentLocator;
sax.startDocument = onStartDocument;
sax.endDocument = onEndDocument;
sax.comment = onComment;
sax.internalSubset = onInternalSubset;
htmlParserCtxtPtr ctxt = makeCtxtWithHTML(&sax, &st, html);
TEST_ASSERT_NOT_NULL(ctxt);
/* Disable SAX before parsing; all callbacks should be suppressed */
ctxt->disableSAX = 1;
int ret = htmlParseDocument(ctxt);
/* Return should still indicate success */
TEST_ASSERT_EQUAL_INT(0, ret);
TEST_ASSERT_EQUAL_INT(0, st.locatorCount);
TEST_ASSERT_EQUAL_INT(0, st.startCount);
TEST_ASSERT_EQUAL_INT(0, st.endCount);
TEST_ASSERT_EQUAL_INT(0, st.commentCount);
TEST_ASSERT_EQUAL_INT(0, st.internalSubsetCount);
htmlFreeParserCtxt(ctxt);
}
/* main */
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseDocument_null_ctxt_returns_minus1);
RUN_TEST(test_htmlParseDocument_null_input_in_ctxt_returns_minus1);
RUN_TEST(test_htmlParseDocument_calls_locator_start_end);
RUN_TEST(test_htmlParseDocument_parses_leading_comment_and_doctype);
RUN_TEST(test_htmlParseDocument_parses_bogus_comment_and_pi_as_comments);
RUN_TEST(test_htmlParseDocument_disableSAX_suppresses_callbacks);
return UNITY_END();
}