libxml / tests /tests_HTMLparser_htmlParseEndTag.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/xmlmemory.h>
#include <libxml/xmlerror.h>
#include <string.h>
#include <stdio.h>
/* The wrapper for the static function is provided in the module:
void test_htmlParseEndTag(htmlParserCtxtPtr ctxt); */
void test_htmlParseEndTag(htmlParserCtxtPtr ctxt);
/* Test SAX data capture */
typedef struct {
int end_count;
int char_count;
int comment_count;
char last_end_name[128];
char last_chars[256];
char last_comment[256];
} TestSAXData;
static TestSAXData gSAXData;
static void cb_endElement(void *ctx, const xmlChar *name) {
TestSAXData *d = (TestSAXData *)ctx;
d->end_count++;
if (name) {
size_t n = strlen((const char *)name);
if (n >= sizeof(d->last_end_name)) n = sizeof(d->last_end_name) - 1;
memcpy(d->last_end_name, name, n);
d->last_end_name[n] = '\0';
}
}
static void cb_characters(void *ctx, const xmlChar *ch, int len) {
TestSAXData *d = (TestSAXData *)ctx;
d->char_count++;
if (ch && len > 0) {
size_t n = (size_t)len;
if (n >= sizeof(d->last_chars)) n = sizeof(d->last_chars) - 1;
memcpy(d->last_chars, ch, n);
d->last_chars[n] = '\0';
} else {
d->last_chars[0] = '\0';
}
}
static void cb_comment(void *ctx, const xmlChar *value) {
TestSAXData *d = (TestSAXData *)ctx;
d->comment_count++;
if (value) {
size_t n = strlen((const char *)value);
if (n >= sizeof(d->last_comment)) n = sizeof(d->last_comment) - 1;
memcpy(d->last_comment, value, n);
d->last_comment[n] = '\0';
} else {
d->last_comment[0] = '\0';
}
}
static xmlSAXHandler gSAX;
/* Helpers */
static void reset_sax_data(void) {
memset(&gSAXData, 0, sizeof(gSAXData));
gSAXData.last_end_name[0] = '\0';
gSAXData.last_chars[0] = '\0';
gSAXData.last_comment[0] = '\0';
}
static htmlParserCtxtPtr make_ctxt_with_input(const char *text, int options) {
htmlParserCtxtPtr ctxt = htmlNewParserCtxt();
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->options = options;
ctxt->sax = &gSAX;
ctxt->userData = &gSAXData;
ctxt->disableSAX = 0;
if (ctxt->dict == NULL) {
ctxt->dict = xmlDictCreate();
TEST_ASSERT_NOT_NULL(ctxt->dict);
}
xmlParserInputPtr in = xmlNewInputStream(ctxt);
TEST_ASSERT_NOT_NULL(in);
size_t len = strlen(text);
xmlChar *buf = xmlStrndup((const xmlChar *)text, (int)len);
TEST_ASSERT_NOT_NULL(buf);
in->base = buf;
in->cur = buf;
in->end = buf + len;
in->line = 1;
in->col = 1;
in->consumed = 0;
inputPush(ctxt, in);
return ctxt;
}
static void ensure_name_stack(htmlParserCtxtPtr ctxt, const char **names, int count) {
if ((ctxt->nameTab == NULL) || (ctxt->nameMax < count)) {
if (ctxt->nameTab != NULL) {
xmlFree((void *)ctxt->nameTab);
}
ctxt->nameTab = (const xmlChar **)xmlMalloc(sizeof(xmlChar *) * (count > 0 ? count : 1));
TEST_ASSERT_NOT_NULL(ctxt->nameTab);
ctxt->nameMax = (count > 0 ? count : 1);
}
for (int i = 0; i < count; i++) {
ctxt->nameTab[i] = (const xmlChar *)names[i]; /* use string literals */
}
ctxt->nameNr = count;
ctxt->name = (count > 0) ? ctxt->nameTab[count - 1] : NULL;
}
/* Unity setup/teardown */
void setUp(void) {
reset_sax_data();
}
void tearDown(void) {
/* nothing */
}
static void init_sax(void) {
memset(&gSAX, 0, sizeof(gSAX));
gSAX.endElement = cb_endElement;
gSAX.characters = cb_characters;
gSAX.comment = cb_comment;
}
/* Tests */
void test_htmlParseEndTag_eof_after_slash_emits_literal_chars(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</", 0);
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(1, gSAXData.char_count);
TEST_ASSERT_EQUAL_STRING("</", gSAXData.last_chars);
TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count);
TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_immediate_gt_noop(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</>", 0);
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(0, gSAXData.char_count);
TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count);
TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_bogus_comment_non_letter(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</123abc>", 0);
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(1, gSAXData.comment_count);
TEST_ASSERT_EQUAL_STRING("123abc", gSAXData.last_comment);
TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_html5_simple_lowercase_name(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</DiV>", HTML_PARSE_HTML5);
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(1, gSAXData.end_count);
TEST_ASSERT_EQUAL_STRING("div", gSAXData.last_end_name);
TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_html5_with_attrs_and_solidus(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</SPAN class=foo />", HTML_PARSE_HTML5);
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(1, gSAXData.end_count);
TEST_ASSERT_EQUAL_STRING("span", gSAXData.last_end_name);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_nonhtml5_special_depth_decrement(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</body>", 0);
ctxt->depth = 3;
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(2, ctxt->depth);
TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count);
TEST_ASSERT_EQUAL_INT(0, gSAXData.comment_count);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_nonhtml5_name_not_in_stack_reports_error_and_returns(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</table>", 0);
/* Prepare a stack that doesn't include "table" */
const char *stack[] = { "p", "div" };
ensure_name_stack(ctxt, stack, 2);
test_htmlParseEndTag(ctxt);
/* We cannot reliably assert error structures here, but endElement must not be called. */
TEST_ASSERT_EQUAL_INT(0, gSAXData.end_count);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseEndTag_nonhtml5_matching_close_emits_end_and_pops(void) {
init_sax();
htmlParserCtxtPtr ctxt = make_ctxt_with_input("</EM>", 0);
const char *stack[] = { "em" };
ensure_name_stack(ctxt, stack, 1);
int before = ctxt->nameNr;
test_htmlParseEndTag(ctxt);
TEST_ASSERT_EQUAL_INT(1, gSAXData.end_count);
TEST_ASSERT_EQUAL_STRING("em", gSAXData.last_end_name);
/* name stack should have been popped by htmlnamePop */
TEST_ASSERT_TRUE(ctxt->nameNr == before - 1);
htmlFreeParserCtxt(ctxt);
}
/* Main */
int main(void) {
xmlInitParser();
UNITY_BEGIN();
RUN_TEST(test_htmlParseEndTag_eof_after_slash_emits_literal_chars);
RUN_TEST(test_htmlParseEndTag_immediate_gt_noop);
RUN_TEST(test_htmlParseEndTag_bogus_comment_non_letter);
RUN_TEST(test_htmlParseEndTag_html5_simple_lowercase_name);
RUN_TEST(test_htmlParseEndTag_html5_with_attrs_and_solidus);
RUN_TEST(test_htmlParseEndTag_nonhtml5_special_depth_decrement);
RUN_TEST(test_htmlParseEndTag_nonhtml5_name_not_in_stack_reports_error_and_returns);
RUN_TEST(test_htmlParseEndTag_nonhtml5_matching_close_emits_end_and_pops);
int rc = UNITY_END();
xmlCleanupParser();
return rc;
}