libxml / tests /tests_HTMLparser_htmlParseChunk.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/parser.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <string.h>
#include <stdlib.h>
/* Simple SAX state to record callbacks */
typedef struct {
int startCount;
int endCount;
int charsCount;
char collected[512];
size_t collectedLen;
} TestSAXState;
/* SAX callbacks */
static void test_startDocument(void *ctx) {
TestSAXState *st = (TestSAXState *)ctx;
st->startCount++;
}
static void test_endDocument(void *ctx) {
TestSAXState *st = (TestSAXState *)ctx;
st->endCount++;
}
static void test_characters(void *ctx, const xmlChar *ch, int len) {
TestSAXState *st = (TestSAXState *)ctx;
if (st->collectedLen + (size_t)len >= sizeof(st->collected))
len = (int)(sizeof(st->collected) - st->collectedLen - 1);
if (len > 0) {
memcpy(st->collected + st->collectedLen, ch, (size_t)len);
st->collectedLen += (size_t)len;
st->collected[st->collectedLen] = '\0';
}
st->charsCount++;
}
/* Helper to create a push parser context with our SAX handler and state */
static htmlParserCtxtPtr create_push_ctxt(TestSAXState *state) {
xmlSAXHandler sax;
memset(&sax, 0, sizeof(sax));
sax.startDocument = test_startDocument;
sax.endDocument = test_endDocument;
sax.characters = test_characters;
htmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(&sax, state, NULL, 0, NULL, XML_CHAR_ENCODING_NONE);
return ctxt;
}
/* Unity hooks */
void setUp(void) {
xmlInitParser();
}
void tearDown(void) {
/* Nothing specific per-test */
}
/* Tests */
static void test_htmlParseChunk_invalid_arguments(void) {
/* NULL context */
int ret = htmlParseChunk(NULL, "x", 1, 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_ARGUMENT, ret);
/* Valid context for further invalid arg checks */
TestSAXState st;
memset(&st, 0, sizeof(st));
htmlParserCtxtPtr ctxt = create_push_ctxt(&st);
TEST_ASSERT_NOT_NULL(ctxt);
TEST_ASSERT_NOT_NULL(ctxt->input);
TEST_ASSERT_NOT_NULL(ctxt->input->buf);
/* Positive size with NULL chunk */
ret = htmlParseChunk(ctxt, NULL, 1, 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_ARGUMENT, ret);
/* Negative size */
ret = htmlParseChunk(ctxt, "x", -5, 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_ARGUMENT, ret);
htmlFreeParserCtxt(ctxt);
}
static void test_htmlParseChunk_partial_then_resume_starts_document(void) {
TestSAXState st;
memset(&st, 0, sizeof(st));
htmlParserCtxtPtr ctxt = create_push_ctxt(&st);
TEST_ASSERT_NOT_NULL(ctxt);
/* Push a single byte: not enough to leave XML_PARSER_START when terminate==0 */
int ret = htmlParseChunk(ctxt, "<", 1, 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_OK, ret);
TEST_ASSERT_EQUAL_INT(0, st.startCount);
TEST_ASSERT_EQUAL_INT(0, st.endCount);
/* Push the remainder of a simple start tag */
ret = htmlParseChunk(ctxt, "html>", 5, 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_OK, ret);
TEST_ASSERT_EQUAL_INT(1, st.startCount);
TEST_ASSERT_EQUAL_INT(0, st.endCount);
/* Push some character data to ensure characters callback fires */
st.collectedLen = 0;
st.collected[0] = '\0';
ret = htmlParseChunk(ctxt, "<body>Hi", 9, 0); /* includes <body>Hi */
TEST_ASSERT_EQUAL_INT(XML_ERR_OK, ret);
TEST_ASSERT_TRUE(st.charsCount >= 1);
TEST_ASSERT_NOT_NULL(strstr(st.collected, "Hi"));
htmlFreeParserCtxt(ctxt);
}
static void test_htmlParseChunk_terminate_calls_endDocument(void) {
TestSAXState st;
memset(&st, 0, sizeof(st));
htmlParserCtxtPtr ctxt = create_push_ctxt(&st);
TEST_ASSERT_NOT_NULL(ctxt);
/* Feed a minimal, not fully closed document */
const char *chunk1 = "<!DOCTYPE html><html><body>Hello";
int ret = htmlParseChunk(ctxt, chunk1, (int)strlen(chunk1), 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_OK, ret);
TEST_ASSERT_EQUAL_INT(1, st.startCount);
TEST_ASSERT_EQUAL_INT(0, st.endCount);
/* Terminate: should auto-close and call endDocument */
ret = htmlParseChunk(ctxt, NULL, 0, 1);
TEST_ASSERT_EQUAL_INT(XML_ERR_OK, ret);
TEST_ASSERT_EQUAL_INT(1, st.endCount);
htmlFreeParserCtxt(ctxt);
}
static void test_htmlParseChunk_respects_stop_parser(void) {
TestSAXState st;
memset(&st, 0, sizeof(st));
htmlParserCtxtPtr ctxt = create_push_ctxt(&st);
TEST_ASSERT_NOT_NULL(ctxt);
/* Stop the parser via public API */
xmlStopParser(ctxt);
int ret = htmlParseChunk(ctxt, "<html>", 6, 0);
TEST_ASSERT_EQUAL_INT(XML_ERR_USER_STOP, ret);
htmlFreeParserCtxt(ctxt);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseChunk_invalid_arguments);
RUN_TEST(test_htmlParseChunk_partial_then_resume_starts_document);
RUN_TEST(test_htmlParseChunk_terminate_calls_endDocument);
RUN_TEST(test_htmlParseChunk_respects_stop_parser);
int rc = UNITY_END();
xmlCleanupParser();
return rc;
}