libxml / tests /tests_HTMLparser_htmlParseTryOrFinish.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/parser.h>
#include <stdlib.h>
#include <string.h>
/* Wrapper for the static function under test (provided in the module). */
extern void test_htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate);
/* Helpers */
static htmlParserCtxtPtr make_ctx_from_mem(const char *buf, int len) {
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(buf, len);
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context");
/* Be permissive to avoid errors affecting the test */
(void)htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
return ctxt;
}
static size_t cur_offset(htmlParserCtxtPtr ctxt) {
return (size_t)(ctxt->input->cur - ctxt->input->base);
}
static size_t end_offset(htmlParserCtxtPtr ctxt) {
return (size_t)(ctxt->input->end - ctxt->input->base);
}
/* Unity hooks */
void setUp(void) {
/* Initialize library per test to be safe */
xmlInitParser();
}
void tearDown(void) {
/* No global cleanup here; contexts are freed in tests */
}
/* Tests */
void test_htmlParseTryOrFinish_start_nonterminate_small_avail_returns_early(void) {
const char *data = "abc"; /* less than 4 bytes */
htmlParserCtxtPtr ctxt = make_ctx_from_mem(data, (int)strlen(data));
TEST_ASSERT_EQUAL_INT(XML_PARSER_START, ctxt->instate);
size_t before_cur = cur_offset(ctxt);
test_htmlParseTryOrFinish(ctxt, 0);
/* Should not have progressed from START due to insufficient bytes and non-terminate */
TEST_ASSERT_EQUAL_INT(XML_PARSER_START, ctxt->instate);
TEST_ASSERT_EQUAL_size_t(before_cur, cur_offset(ctxt));
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseTryOrFinish_start_terminate_progresses_and_consumes_text(void) {
const char *data = "abc";
htmlParserCtxtPtr ctxt = make_ctx_from_mem(data, (int)strlen(data));
size_t end = end_offset(ctxt);
test_htmlParseTryOrFinish(ctxt, 1);
/* Should have progressed into CONTENT and consumed the text */
TEST_ASSERT_EQUAL_INT(XML_PARSER_CONTENT, ctxt->instate);
TEST_ASSERT_EQUAL_size_t(end, cur_offset(ctxt));
/* endCheckState should be 0 after plain text consumption */
TEST_ASSERT_EQUAL_INT(0, ctxt->endCheckState);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseTryOrFinish_misc_to_start_tag_partial_nonterminate(void) {
/* 4 spaces ensure we pass START; "<p" triggers START_TAG with missing '>' */
const char *data = " <p";
htmlParserCtxtPtr ctxt = make_ctx_from_mem(data, (int)strlen(data));
test_htmlParseTryOrFinish(ctxt, 0);
/* With terminate == 0 and no '>', the function should return in START_TAG state */
TEST_ASSERT_EQUAL_INT(XML_PARSER_START_TAG, ctxt->instate);
/* htmlParseLookupGt should have set a non-zero checkIndex for rescanning */
TEST_ASSERT_TRUE(ctxt->checkIndex != 0);
/* Input cursor should not have consumed the '<p' (still at or before '<') */
/* We expect no advance past the '<' since we bailed out before parsing tag */
/* It's acceptable if whitespace was skipped while moving into MISC. Ensure not at end. */
TEST_ASSERT_TRUE(cur_offset(ctxt) < end_offset(ctxt));
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseTryOrFinish_parses_comment_and_advances(void) {
const char *data = "<!--x-->";
htmlParserCtxtPtr ctxt = make_ctx_from_mem(data, (int)strlen(data));
size_t end = end_offset(ctxt);
test_htmlParseTryOrFinish(ctxt, 1);
/* Cursor should be at end after consuming the comment */
TEST_ASSERT_EQUAL_size_t(end, cur_offset(ctxt));
/* State likely remains in MISC or CONTENT; just ensure not EOF */
TEST_ASSERT_NOT_EQUAL_INT(XML_PARSER_EOF, ctxt->instate);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseTryOrFinish_parses_simple_element_and_end_tag_full_terminate(void) {
const char *data = "<p>Hi</p>";
htmlParserCtxtPtr ctxt = make_ctx_from_mem(data, (int)strlen(data));
size_t end = end_offset(ctxt);
test_htmlParseTryOrFinish(ctxt, 1);
/* Should have consumed the entire input */
TEST_ASSERT_EQUAL_size_t(end, cur_offset(ctxt));
/* Should be in CONTENT after handling tags and text */
TEST_ASSERT_EQUAL_INT(XML_PARSER_CONTENT, ctxt->instate);
TEST_ASSERT_EQUAL_INT(0, ctxt->endCheckState);
htmlFreeParserCtxt(ctxt);
}
void test_htmlParseTryOrFinish_end_tag_only_transitions_and_consumes(void) {
const char *data = "</p>";
htmlParserCtxtPtr ctxt = make_ctx_from_mem(data, (int)strlen(data));
size_t end = end_offset(ctxt);
test_htmlParseTryOrFinish(ctxt, 1);
/* End tag should be consumed fully */
TEST_ASSERT_EQUAL_size_t(end, cur_offset(ctxt));
/* After processing end tag, parser returns to CONTENT */
TEST_ASSERT_EQUAL_INT(XML_PARSER_CONTENT, ctxt->instate);
htmlFreeParserCtxt(ctxt);
}
/* Main */
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseTryOrFinish_start_nonterminate_small_avail_returns_early);
RUN_TEST(test_htmlParseTryOrFinish_start_terminate_progresses_and_consumes_text);
RUN_TEST(test_htmlParseTryOrFinish_misc_to_start_tag_partial_nonterminate);
RUN_TEST(test_htmlParseTryOrFinish_parses_comment_and_advances);
RUN_TEST(test_htmlParseTryOrFinish_parses_simple_element_and_end_tag_full_terminate);
RUN_TEST(test_htmlParseTryOrFinish_end_tag_only_transitions_and_consumes);
return UNITY_END();
}