libxml / tests /tests_HTMLparser_htmlParseDocTypeDecl.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
/* Wrapper for the static function under test (provided in module) */
extern void test_htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt);
/* Capture of SAX internalSubset callback parameters */
static int cap_called = 0;
static xmlChar *cap_name = NULL;
static xmlChar *cap_public = NULL;
static xmlChar *cap_system = NULL;
static void capture_reset(void) {
if (cap_name) { xmlFree(cap_name); cap_name = NULL; }
if (cap_public) { xmlFree(cap_public); cap_public = NULL; }
if (cap_system) { xmlFree(cap_system); cap_system = NULL; }
cap_called = 0;
}
static void capture_internalSubset(void *ctx,
const xmlChar *name,
const xmlChar *ExternalID,
const xmlChar *SystemID) {
(void)ctx;
cap_called++;
cap_name = (name != NULL) ? xmlStrdup(name) : NULL;
cap_public = (ExternalID != NULL) ? xmlStrdup(ExternalID) : NULL;
cap_system = (SystemID != NULL) ? xmlStrdup(SystemID) : NULL;
}
static void attach_sax(htmlParserCtxtPtr ctxt, xmlSAXHandler *sax) {
memset(sax, 0, sizeof(*sax));
sax->initialized = XML_SAX2_MAGIC;
sax->internalSubset = capture_internalSubset;
/* Install our handler */
ctxt->sax = sax;
ctxt->disableSAX = 0;
}
static htmlParserCtxtPtr make_ctxt(const char *input, int options, xmlSAXHandler *saxOut) {
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(input, (int)strlen(input));
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context");
if (options != 0) {
int rc = htmlCtxtUseOptions(ctxt, options);
(void)rc; /* options are stored in ctxt->options */
}
attach_sax(ctxt, saxOut);
return ctxt;
}
void setUp(void) {
/* Initialize libxml2, if needed */
xmlInitParser();
capture_reset();
}
void tearDown(void) {
capture_reset();
/* Optionally clean up per-test; global cleanup not strictly required per test */
}
/* Helpers for assertions */
static void assert_xml_equals(const xmlChar *actual, const char *expected) {
TEST_ASSERT_NOT_NULL_MESSAGE(actual, "Expected non-NULL xmlChar*");
TEST_ASSERT_TRUE_MESSAGE(xmlStrEqual(actual, BAD_CAST expected), "xmlChar string mismatch");
}
/* Test: basic DOCTYPE with name; verify callback is invoked and name is parsed */
void test_htmlParseDocTypeDecl_basic_invokes_callback(void) {
const char *buf = "<!DOCTYPE html>";
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
assert_xml_equals(cap_name, "html");
TEST_ASSERT_NULL(cap_public);
TEST_ASSERT_NULL(cap_system);
htmlFreeParserCtxt(ctxt);
}
/* Test: HTML5 option lowercases the name */
void test_htmlParseDocTypeDecl_html5_lowers_name(void) {
const char *buf = "<!DOCTYPE HTML>";
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, HTML_PARSE_HTML5, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
assert_xml_equals(cap_name, "html");
TEST_ASSERT_NULL(cap_public);
TEST_ASSERT_NULL(cap_system);
htmlFreeParserCtxt(ctxt);
}
/* Test: without HTML5 option, case is preserved */
void test_htmlParseDocTypeDecl_no_html5_preserves_case(void) {
const char *buf = "<!DOCTYPE Html>";
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
assert_xml_equals(cap_name, "Html");
TEST_ASSERT_NULL(cap_public);
TEST_ASSERT_NULL(cap_system);
htmlFreeParserCtxt(ctxt);
}
/* Test: PUBLIC and SYSTEM identifiers are parsed */
void test_htmlParseDocTypeDecl_public_and_system_ids(void) {
const char *pub = "-//W3C//DTD HTML 4.01//EN";
const char *sys = "http://www.w3.org/TR/html4/strict.dtd";
char buf[512];
snprintf(buf, sizeof(buf), "<!DOCTYPE html PUBLIC \"%s\" \"%s\">", pub, sys);
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
assert_xml_equals(cap_name, "html");
assert_xml_equals(cap_public, pub);
assert_xml_equals(cap_system, sys);
htmlFreeParserCtxt(ctxt);
}
/* Test: SYSTEM identifier only is parsed */
void test_htmlParseDocTypeDecl_system_only(void) {
const char *sys = "about:legacy-compat";
char buf[256];
snprintf(buf, sizeof(buf), "<!DOCTYPE html SYSTEM \"%s\">", sys);
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
assert_xml_equals(cap_name, "html");
TEST_ASSERT_NULL(cap_public);
assert_xml_equals(cap_system, sys);
htmlFreeParserCtxt(ctxt);
}
/* Test: Missing name still triggers callback with NULL name */
void test_htmlParseDocTypeDecl_missing_name(void) {
const char *buf = "<!DOCTYPE>";
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
TEST_ASSERT_NULL(cap_name);
TEST_ASSERT_NULL(cap_public);
TEST_ASSERT_NULL(cap_system);
htmlFreeParserCtxt(ctxt);
}
/* Test: Missing closing '>' is handled (bogus doctype) and callback still occurs */
void test_htmlParseDocTypeDecl_bogus_missing_gt(void) {
const char *buf = "<!DOCTYPE html PUBLIC \"abc\" \"def\"";
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
assert_xml_equals(cap_name, "html");
assert_xml_equals(cap_public, "abc");
assert_xml_equals(cap_system, "def");
htmlFreeParserCtxt(ctxt);
}
/* Test: After parsing, input cursor advances past '>' */
void test_htmlParseDocTypeDecl_advances_past_gt(void) {
const char *buf = "<!DOCTYPE html><html>";
xmlSAXHandler sax;
htmlParserCtxtPtr ctxt = make_ctxt(buf, 0, &sax);
test_htmlParseDocTypeDecl(ctxt);
TEST_ASSERT_EQUAL_INT(1, cap_called);
/* After skipping DOCTYPE and '>', the next char should be '<' of the html tag */
TEST_ASSERT_NOT_NULL(ctxt->input);
TEST_ASSERT_NOT_NULL(ctxt->input->cur);
TEST_ASSERT_TRUE_MESSAGE(*(ctxt->input->cur) == '<', "Parser cursor not advanced past '>' as expected");
htmlFreeParserCtxt(ctxt);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseDocTypeDecl_basic_invokes_callback);
RUN_TEST(test_htmlParseDocTypeDecl_html5_lowers_name);
RUN_TEST(test_htmlParseDocTypeDecl_no_html5_preserves_case);
RUN_TEST(test_htmlParseDocTypeDecl_public_and_system_ids);
RUN_TEST(test_htmlParseDocTypeDecl_system_only);
RUN_TEST(test_htmlParseDocTypeDecl_missing_name);
RUN_TEST(test_htmlParseDocTypeDecl_bogus_missing_gt);
RUN_TEST(test_htmlParseDocTypeDecl_advances_past_gt);
return UNITY_END();
}